diff --git "a/trainer_state (1).json" "b/trainer_state (1).json" new file mode 100644--- /dev/null +++ "b/trainer_state (1).json" @@ -0,0 +1,60388 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 2500, + "global_step": 100000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.9999999999999996e-07, + "loss": 10.5991, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-07, + "loss": 10.4404, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 7e-07, + "loss": 10.4079, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 9.499999999999999e-07, + "loss": 10.3433, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-06, + "loss": 10.2894, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.4499999999999999e-06, + "loss": 10.2258, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 1.7e-06, + "loss": 10.1618, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 1.9499999999999995e-06, + "loss": 10.0841, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 2.1999999999999997e-06, + "loss": 10.0074, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 2.4499999999999994e-06, + "loss": 9.9546, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.6999999999999996e-06, + "loss": 9.8959, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 2.9499999999999997e-06, + "loss": 9.8438, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 3.1999999999999994e-06, + "loss": 9.7693, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 3.4499999999999996e-06, + "loss": 9.6931, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 3.6999999999999997e-06, + "loss": 9.6457, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 3.9499999999999995e-06, + "loss": 9.5779, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 4.2e-06, + "loss": 9.5133, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 4.45e-06, + "loss": 9.4415, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 4.699999999999999e-06, + "loss": 9.3703, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 4.95e-06, + "loss": 9.2958, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 5.199999999999999e-06, + "loss": 9.2097, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 5.4499999999999995e-06, + "loss": 9.1562, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 5.7e-06, + "loss": 9.0652, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 5.95e-06, + "loss": 9.0147, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 6.199999999999999e-06, + "loss": 8.917, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 6.449999999999999e-06, + "loss": 8.86, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 6.699999999999999e-06, + "loss": 8.7804, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 6.949999999999999e-06, + "loss": 8.6979, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 7.2e-06, + "loss": 8.6123, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 7.449999999999999e-06, + "loss": 8.5626, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 7.699999999999999e-06, + "loss": 8.4648, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 7.949999999999998e-06, + "loss": 8.4083, + "step": 320 + }, + { + "epoch": 0.0, + "learning_rate": 8.2e-06, + "loss": 8.3222, + "step": 330 + }, + { + "epoch": 0.0, + "learning_rate": 8.449999999999999e-06, + "loss": 8.2542, + "step": 340 + }, + { + "epoch": 0.0, + "learning_rate": 8.7e-06, + "loss": 8.1903, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 8.949999999999999e-06, + "loss": 8.1185, + "step": 360 + }, + { + "epoch": 0.0, + "learning_rate": 9.199999999999998e-06, + "loss": 8.0568, + "step": 370 + }, + { + "epoch": 0.0, + "learning_rate": 9.45e-06, + "loss": 7.9776, + "step": 380 + }, + { + "epoch": 0.0, + "learning_rate": 9.699999999999999e-06, + "loss": 7.9201, + "step": 390 + }, + { + "epoch": 0.0, + "learning_rate": 9.949999999999998e-06, + "loss": 7.84, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 1.02e-05, + "loss": 7.788, + "step": 410 + }, + { + "epoch": 0.0, + "learning_rate": 1.0425e-05, + "loss": 7.708, + "step": 420 + }, + { + "epoch": 0.0, + "learning_rate": 1.0675e-05, + "loss": 7.6345, + "step": 430 + }, + { + "epoch": 0.0, + "learning_rate": 1.0925e-05, + "loss": 7.5975, + "step": 440 + }, + { + "epoch": 0.0, + "learning_rate": 1.1174999999999999e-05, + "loss": 7.5093, + "step": 450 + }, + { + "epoch": 0.0, + "learning_rate": 1.1424999999999998e-05, + "loss": 7.4443, + "step": 460 + }, + { + "epoch": 0.0, + "learning_rate": 1.1675e-05, + "loss": 7.3872, + "step": 470 + }, + { + "epoch": 0.0, + "learning_rate": 1.1924999999999998e-05, + "loss": 7.3158, + "step": 480 + }, + { + "epoch": 0.0, + "learning_rate": 1.2175e-05, + "loss": 7.2681, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 1.2424999999999999e-05, + "loss": 7.2233, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 1.2675e-05, + "loss": 7.1814, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 1.2924999999999999e-05, + "loss": 7.1009, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 1.3174999999999998e-05, + "loss": 7.0456, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 1.3424999999999998e-05, + "loss": 7.0051, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 1.3674999999999997e-05, + "loss": 6.9287, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 1.3925e-05, + "loss": 6.858, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 1.4174999999999999e-05, + "loss": 6.8178, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 1.4424999999999998e-05, + "loss": 6.8199, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 1.4674999999999998e-05, + "loss": 6.7664, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 1.4925e-05, + "loss": 6.6982, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 1.5175e-05, + "loss": 6.6649, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 1.5425e-05, + "loss": 6.6193, + "step": 620 + }, + { + "epoch": 0.01, + "learning_rate": 1.5674999999999998e-05, + "loss": 6.5672, + "step": 630 + }, + { + "epoch": 0.01, + "learning_rate": 1.5925e-05, + "loss": 6.5679, + "step": 640 + }, + { + "epoch": 0.01, + "learning_rate": 1.6175e-05, + "loss": 6.5134, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 1.6425e-05, + "loss": 6.4569, + "step": 660 + }, + { + "epoch": 0.01, + "learning_rate": 1.6675e-05, + "loss": 6.4107, + "step": 670 + }, + { + "epoch": 0.01, + "learning_rate": 1.6924999999999998e-05, + "loss": 6.396, + "step": 680 + }, + { + "epoch": 0.01, + "learning_rate": 1.7175e-05, + "loss": 6.3522, + "step": 690 + }, + { + "epoch": 0.01, + "learning_rate": 1.74e-05, + "loss": 6.3442, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 1.765e-05, + "loss": 6.3664, + "step": 710 + }, + { + "epoch": 0.01, + "learning_rate": 1.7899999999999998e-05, + "loss": 6.2891, + "step": 720 + }, + { + "epoch": 0.01, + "learning_rate": 1.8149999999999997e-05, + "loss": 6.2448, + "step": 730 + }, + { + "epoch": 0.01, + "learning_rate": 1.8399999999999997e-05, + "loss": 6.1858, + "step": 740 + }, + { + "epoch": 0.01, + "learning_rate": 1.865e-05, + "loss": 6.1695, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 1.89e-05, + "loss": 6.1665, + "step": 760 + }, + { + "epoch": 0.01, + "learning_rate": 1.915e-05, + "loss": 6.0991, + "step": 770 + }, + { + "epoch": 0.01, + "learning_rate": 1.9399999999999997e-05, + "loss": 6.08, + "step": 780 + }, + { + "epoch": 0.01, + "learning_rate": 1.965e-05, + "loss": 6.0533, + "step": 790 + }, + { + "epoch": 0.01, + "learning_rate": 1.9899999999999996e-05, + "loss": 6.0263, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 2.015e-05, + "loss": 5.9932, + "step": 810 + }, + { + "epoch": 0.01, + "learning_rate": 2.04e-05, + "loss": 5.962, + "step": 820 + }, + { + "epoch": 0.01, + "learning_rate": 2.0649999999999997e-05, + "loss": 5.9409, + "step": 830 + }, + { + "epoch": 0.01, + "learning_rate": 2.09e-05, + "loss": 5.919, + "step": 840 + }, + { + "epoch": 0.01, + "learning_rate": 2.1149999999999996e-05, + "loss": 5.9305, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 2.14e-05, + "loss": 5.9071, + "step": 860 + }, + { + "epoch": 0.01, + "learning_rate": 2.1649999999999998e-05, + "loss": 5.8489, + "step": 870 + }, + { + "epoch": 0.01, + "learning_rate": 2.1899999999999997e-05, + "loss": 5.8171, + "step": 880 + }, + { + "epoch": 0.01, + "learning_rate": 2.215e-05, + "loss": 5.7521, + "step": 890 + }, + { + "epoch": 0.01, + "learning_rate": 2.24e-05, + "loss": 5.748, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 2.2649999999999998e-05, + "loss": 5.7184, + "step": 910 + }, + { + "epoch": 0.01, + "learning_rate": 2.2899999999999998e-05, + "loss": 5.7007, + "step": 920 + }, + { + "epoch": 0.01, + "learning_rate": 2.3149999999999997e-05, + "loss": 5.7113, + "step": 930 + }, + { + "epoch": 0.01, + "learning_rate": 2.34e-05, + "loss": 5.6782, + "step": 940 + }, + { + "epoch": 0.01, + "learning_rate": 2.365e-05, + "loss": 5.6706, + "step": 950 + }, + { + "epoch": 0.01, + "learning_rate": 2.3899999999999998e-05, + "loss": 5.6403, + "step": 960 + }, + { + "epoch": 0.01, + "learning_rate": 2.4149999999999997e-05, + "loss": 5.6065, + "step": 970 + }, + { + "epoch": 0.01, + "learning_rate": 2.4399999999999997e-05, + "loss": 5.5808, + "step": 980 + }, + { + "epoch": 0.01, + "learning_rate": 2.4649999999999996e-05, + "loss": 5.5483, + "step": 990 + }, + { + "epoch": 0.01, + "learning_rate": 2.49e-05, + "loss": 5.5431, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 2.5149999999999998e-05, + "loss": 5.5711, + "step": 1010 + }, + { + "epoch": 0.01, + "learning_rate": 2.5399999999999997e-05, + "loss": 5.5249, + "step": 1020 + }, + { + "epoch": 0.01, + "learning_rate": 2.565e-05, + "loss": 5.483, + "step": 1030 + }, + { + "epoch": 0.01, + "learning_rate": 2.5899999999999996e-05, + "loss": 5.4368, + "step": 1040 + }, + { + "epoch": 0.01, + "learning_rate": 2.615e-05, + "loss": 5.4181, + "step": 1050 + }, + { + "epoch": 0.01, + "learning_rate": 2.6399999999999995e-05, + "loss": 5.4194, + "step": 1060 + }, + { + "epoch": 0.01, + "learning_rate": 2.6649999999999997e-05, + "loss": 5.3955, + "step": 1070 + }, + { + "epoch": 0.01, + "learning_rate": 2.69e-05, + "loss": 5.4054, + "step": 1080 + }, + { + "epoch": 0.01, + "learning_rate": 2.7149999999999996e-05, + "loss": 5.356, + "step": 1090 + }, + { + "epoch": 0.01, + "learning_rate": 2.74e-05, + "loss": 5.3373, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 2.7649999999999994e-05, + "loss": 5.289, + "step": 1110 + }, + { + "epoch": 0.01, + "learning_rate": 2.7899999999999997e-05, + "loss": 5.3229, + "step": 1120 + }, + { + "epoch": 0.01, + "learning_rate": 2.815e-05, + "loss": 5.2827, + "step": 1130 + }, + { + "epoch": 0.01, + "learning_rate": 2.8399999999999996e-05, + "loss": 5.2503, + "step": 1140 + }, + { + "epoch": 0.01, + "learning_rate": 2.865e-05, + "loss": 5.179, + "step": 1150 + }, + { + "epoch": 0.01, + "learning_rate": 2.89e-05, + "loss": 5.1685, + "step": 1160 + }, + { + "epoch": 0.01, + "learning_rate": 2.9149999999999997e-05, + "loss": 5.2519, + "step": 1170 + }, + { + "epoch": 0.01, + "learning_rate": 2.94e-05, + "loss": 5.2074, + "step": 1180 + }, + { + "epoch": 0.01, + "learning_rate": 2.9649999999999996e-05, + "loss": 5.172, + "step": 1190 + }, + { + "epoch": 0.01, + "learning_rate": 2.99e-05, + "loss": 5.1738, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 3.0149999999999998e-05, + "loss": 5.1721, + "step": 1210 + }, + { + "epoch": 0.01, + "learning_rate": 3.0399999999999997e-05, + "loss": 5.1373, + "step": 1220 + }, + { + "epoch": 0.01, + "learning_rate": 3.065e-05, + "loss": 5.0675, + "step": 1230 + }, + { + "epoch": 0.01, + "learning_rate": 3.09e-05, + "loss": 5.0579, + "step": 1240 + }, + { + "epoch": 0.01, + "learning_rate": 3.115e-05, + "loss": 4.9938, + "step": 1250 + }, + { + "epoch": 0.01, + "learning_rate": 3.14e-05, + "loss": 5.0288, + "step": 1260 + }, + { + "epoch": 0.01, + "learning_rate": 3.165e-05, + "loss": 5.0438, + "step": 1270 + }, + { + "epoch": 0.01, + "learning_rate": 3.1899999999999996e-05, + "loss": 4.9984, + "step": 1280 + }, + { + "epoch": 0.01, + "learning_rate": 3.2149999999999995e-05, + "loss": 5.0142, + "step": 1290 + }, + { + "epoch": 0.01, + "learning_rate": 3.2399999999999995e-05, + "loss": 4.9647, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 3.265e-05, + "loss": 4.9983, + "step": 1310 + }, + { + "epoch": 0.01, + "learning_rate": 3.289999999999999e-05, + "loss": 4.9675, + "step": 1320 + }, + { + "epoch": 0.01, + "learning_rate": 3.315e-05, + "loss": 4.9787, + "step": 1330 + }, + { + "epoch": 0.01, + "learning_rate": 3.34e-05, + "loss": 4.9638, + "step": 1340 + }, + { + "epoch": 0.01, + "learning_rate": 3.365e-05, + "loss": 4.8791, + "step": 1350 + }, + { + "epoch": 0.01, + "learning_rate": 3.39e-05, + "loss": 4.8835, + "step": 1360 + }, + { + "epoch": 0.01, + "learning_rate": 3.4149999999999997e-05, + "loss": 4.8899, + "step": 1370 + }, + { + "epoch": 0.01, + "learning_rate": 3.4399999999999996e-05, + "loss": 4.8351, + "step": 1380 + }, + { + "epoch": 0.01, + "learning_rate": 3.465e-05, + "loss": 4.8162, + "step": 1390 + }, + { + "epoch": 0.01, + "learning_rate": 3.4899999999999995e-05, + "loss": 4.8588, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 3.515e-05, + "loss": 4.8019, + "step": 1410 + }, + { + "epoch": 0.01, + "learning_rate": 3.539999999999999e-05, + "loss": 4.8557, + "step": 1420 + }, + { + "epoch": 0.01, + "learning_rate": 3.565e-05, + "loss": 4.8064, + "step": 1430 + }, + { + "epoch": 0.01, + "learning_rate": 3.59e-05, + "loss": 4.7612, + "step": 1440 + }, + { + "epoch": 0.01, + "learning_rate": 3.615e-05, + "loss": 4.7071, + "step": 1450 + }, + { + "epoch": 0.01, + "learning_rate": 3.64e-05, + "loss": 4.727, + "step": 1460 + }, + { + "epoch": 0.01, + "learning_rate": 3.6649999999999996e-05, + "loss": 4.686, + "step": 1470 + }, + { + "epoch": 0.01, + "learning_rate": 3.6899999999999996e-05, + "loss": 4.6832, + "step": 1480 + }, + { + "epoch": 0.01, + "learning_rate": 3.7149999999999995e-05, + "loss": 4.6862, + "step": 1490 + }, + { + "epoch": 0.01, + "learning_rate": 3.7399999999999994e-05, + "loss": 4.6609, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 3.7649999999999994e-05, + "loss": 4.6903, + "step": 1510 + }, + { + "epoch": 0.02, + "learning_rate": 3.789999999999999e-05, + "loss": 4.6529, + "step": 1520 + }, + { + "epoch": 0.02, + "learning_rate": 3.815e-05, + "loss": 4.6384, + "step": 1530 + }, + { + "epoch": 0.02, + "learning_rate": 3.84e-05, + "loss": 4.6419, + "step": 1540 + }, + { + "epoch": 0.02, + "learning_rate": 3.865e-05, + "loss": 4.5833, + "step": 1550 + }, + { + "epoch": 0.02, + "learning_rate": 3.8900000000000004e-05, + "loss": 4.5477, + "step": 1560 + }, + { + "epoch": 0.02, + "learning_rate": 3.9149999999999996e-05, + "loss": 4.6188, + "step": 1570 + }, + { + "epoch": 0.02, + "learning_rate": 3.9399999999999995e-05, + "loss": 4.5747, + "step": 1580 + }, + { + "epoch": 0.02, + "learning_rate": 3.9649999999999995e-05, + "loss": 4.4917, + "step": 1590 + }, + { + "epoch": 0.02, + "learning_rate": 3.99e-05, + "loss": 4.4842, + "step": 1600 + }, + { + "epoch": 0.02, + "learning_rate": 4.014999999999999e-05, + "loss": 4.5239, + "step": 1610 + }, + { + "epoch": 0.02, + "learning_rate": 4.039999999999999e-05, + "loss": 4.4849, + "step": 1620 + }, + { + "epoch": 0.02, + "learning_rate": 4.065e-05, + "loss": 4.4855, + "step": 1630 + }, + { + "epoch": 0.02, + "learning_rate": 4.09e-05, + "loss": 4.4695, + "step": 1640 + }, + { + "epoch": 0.02, + "learning_rate": 4.115e-05, + "loss": 4.462, + "step": 1650 + }, + { + "epoch": 0.02, + "learning_rate": 4.14e-05, + "loss": 4.4728, + "step": 1660 + }, + { + "epoch": 0.02, + "learning_rate": 4.1649999999999996e-05, + "loss": 4.4426, + "step": 1670 + }, + { + "epoch": 0.02, + "learning_rate": 4.1899999999999995e-05, + "loss": 4.4123, + "step": 1680 + }, + { + "epoch": 0.02, + "learning_rate": 4.215e-05, + "loss": 4.4026, + "step": 1690 + }, + { + "epoch": 0.02, + "learning_rate": 4.24e-05, + "loss": 4.4942, + "step": 1700 + }, + { + "epoch": 0.02, + "learning_rate": 4.264999999999999e-05, + "loss": 4.4393, + "step": 1710 + }, + { + "epoch": 0.02, + "learning_rate": 4.289999999999999e-05, + "loss": 4.3781, + "step": 1720 + }, + { + "epoch": 0.02, + "learning_rate": 4.315e-05, + "loss": 4.36, + "step": 1730 + }, + { + "epoch": 0.02, + "learning_rate": 4.34e-05, + "loss": 4.4202, + "step": 1740 + }, + { + "epoch": 0.02, + "learning_rate": 4.364999999999999e-05, + "loss": 4.4418, + "step": 1750 + }, + { + "epoch": 0.02, + "learning_rate": 4.3899999999999996e-05, + "loss": 4.3932, + "step": 1760 + }, + { + "epoch": 0.02, + "learning_rate": 4.4149999999999996e-05, + "loss": 4.3306, + "step": 1770 + }, + { + "epoch": 0.02, + "learning_rate": 4.4399999999999995e-05, + "loss": 4.2984, + "step": 1780 + }, + { + "epoch": 0.02, + "learning_rate": 4.465e-05, + "loss": 4.2832, + "step": 1790 + }, + { + "epoch": 0.02, + "learning_rate": 4.49e-05, + "loss": 4.2793, + "step": 1800 + }, + { + "epoch": 0.02, + "learning_rate": 4.514999999999999e-05, + "loss": 4.2818, + "step": 1810 + }, + { + "epoch": 0.02, + "learning_rate": 4.539999999999999e-05, + "loss": 4.2509, + "step": 1820 + }, + { + "epoch": 0.02, + "learning_rate": 4.565e-05, + "loss": 4.2453, + "step": 1830 + }, + { + "epoch": 0.02, + "learning_rate": 4.59e-05, + "loss": 4.1921, + "step": 1840 + }, + { + "epoch": 0.02, + "learning_rate": 4.614999999999999e-05, + "loss": 4.2396, + "step": 1850 + }, + { + "epoch": 0.02, + "learning_rate": 4.6399999999999996e-05, + "loss": 4.2299, + "step": 1860 + }, + { + "epoch": 0.02, + "learning_rate": 4.6649999999999996e-05, + "loss": 4.192, + "step": 1870 + }, + { + "epoch": 0.02, + "learning_rate": 4.6899999999999995e-05, + "loss": 4.2031, + "step": 1880 + }, + { + "epoch": 0.02, + "learning_rate": 4.715e-05, + "loss": 4.2628, + "step": 1890 + }, + { + "epoch": 0.02, + "learning_rate": 4.7399999999999993e-05, + "loss": 4.2162, + "step": 1900 + }, + { + "epoch": 0.02, + "learning_rate": 4.764999999999999e-05, + "loss": 4.1488, + "step": 1910 + }, + { + "epoch": 0.02, + "learning_rate": 4.79e-05, + "loss": 4.1782, + "step": 1920 + }, + { + "epoch": 0.02, + "learning_rate": 4.815e-05, + "loss": 4.164, + "step": 1930 + }, + { + "epoch": 0.02, + "learning_rate": 4.84e-05, + "loss": 4.1491, + "step": 1940 + }, + { + "epoch": 0.02, + "learning_rate": 4.864999999999999e-05, + "loss": 4.1645, + "step": 1950 + }, + { + "epoch": 0.02, + "learning_rate": 4.8899999999999996e-05, + "loss": 4.1327, + "step": 1960 + }, + { + "epoch": 0.02, + "learning_rate": 4.9149999999999995e-05, + "loss": 4.1126, + "step": 1970 + }, + { + "epoch": 0.02, + "learning_rate": 4.9399999999999995e-05, + "loss": 4.0764, + "step": 1980 + }, + { + "epoch": 0.02, + "learning_rate": 4.965e-05, + "loss": 4.1517, + "step": 1990 + }, + { + "epoch": 0.02, + "learning_rate": 4.989999999999999e-05, + "loss": 4.1227, + "step": 2000 + }, + { + "epoch": 0.02, + "learning_rate": 5.014999999999999e-05, + "loss": 4.0753, + "step": 2010 + }, + { + "epoch": 0.02, + "learning_rate": 5.04e-05, + "loss": 4.0627, + "step": 2020 + }, + { + "epoch": 0.02, + "learning_rate": 5.065e-05, + "loss": 4.0252, + "step": 2030 + }, + { + "epoch": 0.02, + "learning_rate": 5.089999999999999e-05, + "loss": 4.0361, + "step": 2040 + }, + { + "epoch": 0.02, + "learning_rate": 5.1149999999999996e-05, + "loss": 4.0053, + "step": 2050 + }, + { + "epoch": 0.02, + "learning_rate": 5.1399999999999996e-05, + "loss": 4.0252, + "step": 2060 + }, + { + "epoch": 0.02, + "learning_rate": 5.1649999999999995e-05, + "loss": 4.001, + "step": 2070 + }, + { + "epoch": 0.02, + "learning_rate": 5.1899999999999994e-05, + "loss": 3.9855, + "step": 2080 + }, + { + "epoch": 0.02, + "learning_rate": 5.215e-05, + "loss": 3.9878, + "step": 2090 + }, + { + "epoch": 0.02, + "learning_rate": 5.239999999999999e-05, + "loss": 3.989, + "step": 2100 + }, + { + "epoch": 0.02, + "learning_rate": 5.264999999999999e-05, + "loss": 3.9635, + "step": 2110 + }, + { + "epoch": 0.02, + "learning_rate": 5.29e-05, + "loss": 4.0035, + "step": 2120 + }, + { + "epoch": 0.02, + "learning_rate": 5.315e-05, + "loss": 3.9413, + "step": 2130 + }, + { + "epoch": 0.02, + "learning_rate": 5.339999999999999e-05, + "loss": 3.8845, + "step": 2140 + }, + { + "epoch": 0.02, + "learning_rate": 5.3649999999999996e-05, + "loss": 3.8886, + "step": 2150 + }, + { + "epoch": 0.02, + "learning_rate": 5.3899999999999996e-05, + "loss": 3.9756, + "step": 2160 + }, + { + "epoch": 0.02, + "learning_rate": 5.4149999999999995e-05, + "loss": 3.9235, + "step": 2170 + }, + { + "epoch": 0.02, + "learning_rate": 5.44e-05, + "loss": 3.9015, + "step": 2180 + }, + { + "epoch": 0.02, + "learning_rate": 5.4649999999999993e-05, + "loss": 3.9052, + "step": 2190 + }, + { + "epoch": 0.02, + "learning_rate": 5.489999999999999e-05, + "loss": 3.9044, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 5.514999999999999e-05, + "loss": 3.8846, + "step": 2210 + }, + { + "epoch": 0.02, + "learning_rate": 5.54e-05, + "loss": 3.902, + "step": 2220 + }, + { + "epoch": 0.02, + "learning_rate": 5.565e-05, + "loss": 3.8615, + "step": 2230 + }, + { + "epoch": 0.02, + "learning_rate": 5.589999999999999e-05, + "loss": 3.8604, + "step": 2240 + }, + { + "epoch": 0.02, + "learning_rate": 5.6149999999999996e-05, + "loss": 3.8167, + "step": 2250 + }, + { + "epoch": 0.02, + "learning_rate": 5.6399999999999995e-05, + "loss": 3.819, + "step": 2260 + }, + { + "epoch": 0.02, + "learning_rate": 5.6649999999999995e-05, + "loss": 3.7915, + "step": 2270 + }, + { + "epoch": 0.02, + "learning_rate": 5.69e-05, + "loss": 3.7781, + "step": 2280 + }, + { + "epoch": 0.02, + "learning_rate": 5.714999999999999e-05, + "loss": 3.7887, + "step": 2290 + }, + { + "epoch": 0.02, + "learning_rate": 5.739999999999999e-05, + "loss": 3.8007, + "step": 2300 + }, + { + "epoch": 0.02, + "learning_rate": 5.765e-05, + "loss": 3.7612, + "step": 2310 + }, + { + "epoch": 0.02, + "learning_rate": 5.79e-05, + "loss": 3.75, + "step": 2320 + }, + { + "epoch": 0.02, + "learning_rate": 5.814999999999999e-05, + "loss": 3.7403, + "step": 2330 + }, + { + "epoch": 0.02, + "learning_rate": 5.839999999999999e-05, + "loss": 3.7463, + "step": 2340 + }, + { + "epoch": 0.02, + "learning_rate": 5.8649999999999996e-05, + "loss": 3.7643, + "step": 2350 + }, + { + "epoch": 0.02, + "learning_rate": 5.8899999999999995e-05, + "loss": 3.7407, + "step": 2360 + }, + { + "epoch": 0.02, + "learning_rate": 5.9149999999999994e-05, + "loss": 3.7601, + "step": 2370 + }, + { + "epoch": 0.02, + "learning_rate": 5.94e-05, + "loss": 3.6923, + "step": 2380 + }, + { + "epoch": 0.02, + "learning_rate": 5.964999999999999e-05, + "loss": 3.7234, + "step": 2390 + }, + { + "epoch": 0.02, + "learning_rate": 5.989999999999999e-05, + "loss": 3.6752, + "step": 2400 + }, + { + "epoch": 0.02, + "learning_rate": 6.015e-05, + "loss": 3.6437, + "step": 2410 + }, + { + "epoch": 0.02, + "learning_rate": 6.04e-05, + "loss": 3.6805, + "step": 2420 + }, + { + "epoch": 0.02, + "learning_rate": 6.064999999999999e-05, + "loss": 3.7198, + "step": 2430 + }, + { + "epoch": 0.02, + "learning_rate": 6.0899999999999996e-05, + "loss": 3.6588, + "step": 2440 + }, + { + "epoch": 0.02, + "learning_rate": 6.115e-05, + "loss": 3.6113, + "step": 2450 + }, + { + "epoch": 0.02, + "learning_rate": 6.139999999999999e-05, + "loss": 3.6444, + "step": 2460 + }, + { + "epoch": 0.02, + "learning_rate": 6.165e-05, + "loss": 3.6489, + "step": 2470 + }, + { + "epoch": 0.02, + "learning_rate": 6.19e-05, + "loss": 3.5805, + "step": 2480 + }, + { + "epoch": 0.02, + "learning_rate": 6.214999999999999e-05, + "loss": 3.5343, + "step": 2490 + }, + { + "epoch": 0.03, + "learning_rate": 6.239999999999999e-05, + "loss": 3.5642, + "step": 2500 + }, + { + "epoch": 0.03, + "eval_accuracy": 0.4383129848262597, + "eval_loss": 3.9296875, + "eval_runtime": 97.6812, + "eval_samples_per_second": 818.99, + "eval_steps_per_second": 1.607, + "step": 2500 + }, + { + "epoch": 0.03, + "learning_rate": 6.264999999999999e-05, + "loss": 3.5807, + "step": 2510 + }, + { + "epoch": 0.03, + "learning_rate": 6.29e-05, + "loss": 3.5057, + "step": 2520 + }, + { + "epoch": 0.03, + "learning_rate": 6.314999999999999e-05, + "loss": 3.5416, + "step": 2530 + }, + { + "epoch": 0.03, + "learning_rate": 6.34e-05, + "loss": 3.5582, + "step": 2540 + }, + { + "epoch": 0.03, + "learning_rate": 6.365e-05, + "loss": 3.5261, + "step": 2550 + }, + { + "epoch": 0.03, + "learning_rate": 6.39e-05, + "loss": 3.5442, + "step": 2560 + }, + { + "epoch": 0.03, + "learning_rate": 6.414999999999999e-05, + "loss": 3.5676, + "step": 2570 + }, + { + "epoch": 0.03, + "learning_rate": 6.44e-05, + "loss": 3.514, + "step": 2580 + }, + { + "epoch": 0.03, + "learning_rate": 6.465e-05, + "loss": 3.5178, + "step": 2590 + }, + { + "epoch": 0.03, + "learning_rate": 6.489999999999999e-05, + "loss": 3.4848, + "step": 2600 + }, + { + "epoch": 0.03, + "learning_rate": 6.515e-05, + "loss": 3.4749, + "step": 2610 + }, + { + "epoch": 0.03, + "learning_rate": 6.539999999999999e-05, + "loss": 3.4313, + "step": 2620 + }, + { + "epoch": 0.03, + "learning_rate": 6.565e-05, + "loss": 3.473, + "step": 2630 + }, + { + "epoch": 0.03, + "learning_rate": 6.59e-05, + "loss": 3.4574, + "step": 2640 + }, + { + "epoch": 0.03, + "learning_rate": 6.615e-05, + "loss": 3.4329, + "step": 2650 + }, + { + "epoch": 0.03, + "learning_rate": 6.639999999999999e-05, + "loss": 3.4348, + "step": 2660 + }, + { + "epoch": 0.03, + "learning_rate": 6.665e-05, + "loss": 3.4441, + "step": 2670 + }, + { + "epoch": 0.03, + "learning_rate": 6.69e-05, + "loss": 3.4589, + "step": 2680 + }, + { + "epoch": 0.03, + "learning_rate": 6.714999999999999e-05, + "loss": 3.4277, + "step": 2690 + }, + { + "epoch": 0.03, + "learning_rate": 6.739999999999998e-05, + "loss": 3.4026, + "step": 2700 + }, + { + "epoch": 0.03, + "learning_rate": 6.764999999999999e-05, + "loss": 3.3649, + "step": 2710 + }, + { + "epoch": 0.03, + "learning_rate": 6.79e-05, + "loss": 3.356, + "step": 2720 + }, + { + "epoch": 0.03, + "learning_rate": 6.814999999999999e-05, + "loss": 3.3409, + "step": 2730 + }, + { + "epoch": 0.03, + "learning_rate": 6.84e-05, + "loss": 3.3401, + "step": 2740 + }, + { + "epoch": 0.03, + "learning_rate": 6.864999999999999e-05, + "loss": 3.3325, + "step": 2750 + }, + { + "epoch": 0.03, + "learning_rate": 6.89e-05, + "loss": 3.3165, + "step": 2760 + }, + { + "epoch": 0.03, + "learning_rate": 6.915e-05, + "loss": 3.3381, + "step": 2770 + }, + { + "epoch": 0.03, + "learning_rate": 6.939999999999999e-05, + "loss": 3.3134, + "step": 2780 + }, + { + "epoch": 0.03, + "learning_rate": 6.964999999999999e-05, + "loss": 3.3009, + "step": 2790 + }, + { + "epoch": 0.03, + "learning_rate": 6.989999999999999e-05, + "loss": 3.289, + "step": 2800 + }, + { + "epoch": 0.03, + "learning_rate": 7.015e-05, + "loss": 3.2767, + "step": 2810 + }, + { + "epoch": 0.03, + "learning_rate": 7.039999999999999e-05, + "loss": 3.2677, + "step": 2820 + }, + { + "epoch": 0.03, + "learning_rate": 7.065e-05, + "loss": 3.2375, + "step": 2830 + }, + { + "epoch": 0.03, + "learning_rate": 7.09e-05, + "loss": 3.2508, + "step": 2840 + }, + { + "epoch": 0.03, + "learning_rate": 7.115e-05, + "loss": 3.2772, + "step": 2850 + }, + { + "epoch": 0.03, + "learning_rate": 7.139999999999999e-05, + "loss": 3.1986, + "step": 2860 + }, + { + "epoch": 0.03, + "learning_rate": 7.165e-05, + "loss": 3.2352, + "step": 2870 + }, + { + "epoch": 0.03, + "learning_rate": 7.19e-05, + "loss": 3.2138, + "step": 2880 + }, + { + "epoch": 0.03, + "learning_rate": 7.214999999999999e-05, + "loss": 3.207, + "step": 2890 + }, + { + "epoch": 0.03, + "learning_rate": 7.24e-05, + "loss": 3.189, + "step": 2900 + }, + { + "epoch": 0.03, + "learning_rate": 7.264999999999999e-05, + "loss": 3.129, + "step": 2910 + }, + { + "epoch": 0.03, + "learning_rate": 7.29e-05, + "loss": 3.149, + "step": 2920 + }, + { + "epoch": 0.03, + "learning_rate": 7.315e-05, + "loss": 3.167, + "step": 2930 + }, + { + "epoch": 0.03, + "learning_rate": 7.34e-05, + "loss": 3.1163, + "step": 2940 + }, + { + "epoch": 0.03, + "learning_rate": 7.364999999999999e-05, + "loss": 3.1226, + "step": 2950 + }, + { + "epoch": 0.03, + "learning_rate": 7.39e-05, + "loss": 3.08, + "step": 2960 + }, + { + "epoch": 0.03, + "learning_rate": 7.415e-05, + "loss": 3.1136, + "step": 2970 + }, + { + "epoch": 0.03, + "learning_rate": 7.439999999999999e-05, + "loss": 3.0943, + "step": 2980 + }, + { + "epoch": 0.03, + "learning_rate": 7.464999999999998e-05, + "loss": 3.0379, + "step": 2990 + }, + { + "epoch": 0.03, + "learning_rate": 7.489999999999999e-05, + "loss": 3.0346, + "step": 3000 + }, + { + "epoch": 0.03, + "learning_rate": 7.515e-05, + "loss": 3.0365, + "step": 3010 + }, + { + "epoch": 0.03, + "learning_rate": 7.54e-05, + "loss": 3.0473, + "step": 3020 + }, + { + "epoch": 0.03, + "learning_rate": 7.564999999999998e-05, + "loss": 3.0086, + "step": 3030 + }, + { + "epoch": 0.03, + "learning_rate": 7.589999999999999e-05, + "loss": 3.0474, + "step": 3040 + }, + { + "epoch": 0.03, + "learning_rate": 7.615e-05, + "loss": 3.0107, + "step": 3050 + }, + { + "epoch": 0.03, + "learning_rate": 7.639999999999999e-05, + "loss": 2.9755, + "step": 3060 + }, + { + "epoch": 0.03, + "learning_rate": 7.664999999999999e-05, + "loss": 2.9917, + "step": 3070 + }, + { + "epoch": 0.03, + "learning_rate": 7.69e-05, + "loss": 2.952, + "step": 3080 + }, + { + "epoch": 0.03, + "learning_rate": 7.714999999999999e-05, + "loss": 2.9553, + "step": 3090 + }, + { + "epoch": 0.03, + "learning_rate": 7.74e-05, + "loss": 2.9484, + "step": 3100 + }, + { + "epoch": 0.03, + "learning_rate": 7.765e-05, + "loss": 3.0178, + "step": 3110 + }, + { + "epoch": 0.03, + "learning_rate": 7.79e-05, + "loss": 2.9621, + "step": 3120 + }, + { + "epoch": 0.03, + "learning_rate": 7.815e-05, + "loss": 2.9268, + "step": 3130 + }, + { + "epoch": 0.03, + "learning_rate": 7.839999999999998e-05, + "loss": 2.9683, + "step": 3140 + }, + { + "epoch": 0.03, + "learning_rate": 7.864999999999999e-05, + "loss": 2.9026, + "step": 3150 + }, + { + "epoch": 0.03, + "learning_rate": 7.89e-05, + "loss": 2.8892, + "step": 3160 + }, + { + "epoch": 0.03, + "learning_rate": 7.914999999999999e-05, + "loss": 2.8845, + "step": 3170 + }, + { + "epoch": 0.03, + "learning_rate": 7.939999999999999e-05, + "loss": 2.8826, + "step": 3180 + }, + { + "epoch": 0.03, + "learning_rate": 7.965e-05, + "loss": 2.8888, + "step": 3190 + }, + { + "epoch": 0.03, + "learning_rate": 7.989999999999999e-05, + "loss": 2.8068, + "step": 3200 + }, + { + "epoch": 0.03, + "learning_rate": 8.015e-05, + "loss": 2.8483, + "step": 3210 + }, + { + "epoch": 0.03, + "learning_rate": 8.04e-05, + "loss": 2.8422, + "step": 3220 + }, + { + "epoch": 0.03, + "learning_rate": 8.064999999999998e-05, + "loss": 2.8175, + "step": 3230 + }, + { + "epoch": 0.03, + "learning_rate": 8.089999999999999e-05, + "loss": 2.8425, + "step": 3240 + }, + { + "epoch": 0.03, + "learning_rate": 8.115e-05, + "loss": 2.826, + "step": 3250 + }, + { + "epoch": 0.03, + "learning_rate": 8.139999999999999e-05, + "loss": 2.7938, + "step": 3260 + }, + { + "epoch": 0.03, + "learning_rate": 8.164999999999999e-05, + "loss": 2.7908, + "step": 3270 + }, + { + "epoch": 0.03, + "learning_rate": 8.19e-05, + "loss": 2.7734, + "step": 3280 + }, + { + "epoch": 0.03, + "learning_rate": 8.214999999999999e-05, + "loss": 2.8051, + "step": 3290 + }, + { + "epoch": 0.03, + "learning_rate": 8.24e-05, + "loss": 2.7474, + "step": 3300 + }, + { + "epoch": 0.03, + "learning_rate": 8.265e-05, + "loss": 2.7743, + "step": 3310 + }, + { + "epoch": 0.03, + "learning_rate": 8.289999999999998e-05, + "loss": 2.7415, + "step": 3320 + }, + { + "epoch": 0.03, + "learning_rate": 8.314999999999999e-05, + "loss": 2.7359, + "step": 3330 + }, + { + "epoch": 0.03, + "learning_rate": 8.34e-05, + "loss": 2.7306, + "step": 3340 + }, + { + "epoch": 0.03, + "learning_rate": 8.364999999999999e-05, + "loss": 2.7382, + "step": 3350 + }, + { + "epoch": 0.03, + "learning_rate": 8.389999999999999e-05, + "loss": 2.7887, + "step": 3360 + }, + { + "epoch": 0.03, + "learning_rate": 8.415e-05, + "loss": 2.7417, + "step": 3370 + }, + { + "epoch": 0.03, + "learning_rate": 8.439999999999999e-05, + "loss": 2.7036, + "step": 3380 + }, + { + "epoch": 0.03, + "learning_rate": 8.465e-05, + "loss": 2.6724, + "step": 3390 + }, + { + "epoch": 0.03, + "learning_rate": 8.489999999999999e-05, + "loss": 2.6508, + "step": 3400 + }, + { + "epoch": 0.03, + "learning_rate": 8.515e-05, + "loss": 2.6735, + "step": 3410 + }, + { + "epoch": 0.03, + "learning_rate": 8.54e-05, + "loss": 2.652, + "step": 3420 + }, + { + "epoch": 0.03, + "learning_rate": 8.564999999999998e-05, + "loss": 2.6687, + "step": 3430 + }, + { + "epoch": 0.03, + "learning_rate": 8.589999999999999e-05, + "loss": 2.6581, + "step": 3440 + }, + { + "epoch": 0.03, + "learning_rate": 8.615e-05, + "loss": 2.6346, + "step": 3450 + }, + { + "epoch": 0.03, + "learning_rate": 8.639999999999999e-05, + "loss": 2.6429, + "step": 3460 + }, + { + "epoch": 0.03, + "learning_rate": 8.664999999999999e-05, + "loss": 2.6137, + "step": 3470 + }, + { + "epoch": 0.03, + "learning_rate": 8.69e-05, + "loss": 2.6249, + "step": 3480 + }, + { + "epoch": 0.03, + "learning_rate": 8.714999999999999e-05, + "loss": 2.6009, + "step": 3490 + }, + { + "epoch": 0.04, + "learning_rate": 8.74e-05, + "loss": 2.6004, + "step": 3500 + }, + { + "epoch": 0.04, + "learning_rate": 8.765e-05, + "loss": 2.5755, + "step": 3510 + }, + { + "epoch": 0.04, + "learning_rate": 8.789999999999998e-05, + "loss": 2.5508, + "step": 3520 + }, + { + "epoch": 0.04, + "learning_rate": 8.814999999999999e-05, + "loss": 2.5676, + "step": 3530 + }, + { + "epoch": 0.04, + "learning_rate": 8.84e-05, + "loss": 2.5596, + "step": 3540 + }, + { + "epoch": 0.04, + "learning_rate": 8.864999999999999e-05, + "loss": 2.5754, + "step": 3550 + }, + { + "epoch": 0.04, + "learning_rate": 8.889999999999999e-05, + "loss": 2.5696, + "step": 3560 + }, + { + "epoch": 0.04, + "learning_rate": 8.915e-05, + "loss": 2.542, + "step": 3570 + }, + { + "epoch": 0.04, + "learning_rate": 8.939999999999999e-05, + "loss": 2.5561, + "step": 3580 + }, + { + "epoch": 0.04, + "learning_rate": 8.965e-05, + "loss": 2.5059, + "step": 3590 + }, + { + "epoch": 0.04, + "learning_rate": 8.99e-05, + "loss": 2.5288, + "step": 3600 + }, + { + "epoch": 0.04, + "learning_rate": 9.014999999999998e-05, + "loss": 2.5039, + "step": 3610 + }, + { + "epoch": 0.04, + "learning_rate": 9.039999999999999e-05, + "loss": 2.5243, + "step": 3620 + }, + { + "epoch": 0.04, + "learning_rate": 9.064999999999998e-05, + "loss": 2.4772, + "step": 3630 + }, + { + "epoch": 0.04, + "learning_rate": 9.089999999999999e-05, + "loss": 2.4543, + "step": 3640 + }, + { + "epoch": 0.04, + "learning_rate": 9.114999999999999e-05, + "loss": 2.4743, + "step": 3650 + }, + { + "epoch": 0.04, + "learning_rate": 9.139999999999999e-05, + "loss": 2.488, + "step": 3660 + }, + { + "epoch": 0.04, + "learning_rate": 9.164999999999999e-05, + "loss": 2.4918, + "step": 3670 + }, + { + "epoch": 0.04, + "learning_rate": 9.19e-05, + "loss": 2.4866, + "step": 3680 + }, + { + "epoch": 0.04, + "learning_rate": 9.214999999999999e-05, + "loss": 2.4649, + "step": 3690 + }, + { + "epoch": 0.04, + "learning_rate": 9.24e-05, + "loss": 2.4613, + "step": 3700 + }, + { + "epoch": 0.04, + "learning_rate": 9.265e-05, + "loss": 2.4337, + "step": 3710 + }, + { + "epoch": 0.04, + "learning_rate": 9.289999999999998e-05, + "loss": 2.4073, + "step": 3720 + }, + { + "epoch": 0.04, + "learning_rate": 9.314999999999999e-05, + "loss": 2.4314, + "step": 3730 + }, + { + "epoch": 0.04, + "learning_rate": 9.34e-05, + "loss": 2.4311, + "step": 3740 + }, + { + "epoch": 0.04, + "learning_rate": 9.364999999999999e-05, + "loss": 2.4553, + "step": 3750 + }, + { + "epoch": 0.04, + "learning_rate": 9.389999999999999e-05, + "loss": 2.4242, + "step": 3760 + }, + { + "epoch": 0.04, + "learning_rate": 9.415e-05, + "loss": 2.3964, + "step": 3770 + }, + { + "epoch": 0.04, + "learning_rate": 9.439999999999999e-05, + "loss": 2.3865, + "step": 3780 + }, + { + "epoch": 0.04, + "learning_rate": 9.465e-05, + "loss": 2.4134, + "step": 3790 + }, + { + "epoch": 0.04, + "learning_rate": 9.49e-05, + "loss": 2.3987, + "step": 3800 + }, + { + "epoch": 0.04, + "learning_rate": 9.514999999999998e-05, + "loss": 2.3879, + "step": 3810 + }, + { + "epoch": 0.04, + "learning_rate": 9.539999999999999e-05, + "loss": 2.3892, + "step": 3820 + }, + { + "epoch": 0.04, + "learning_rate": 9.565e-05, + "loss": 2.3583, + "step": 3830 + }, + { + "epoch": 0.04, + "learning_rate": 9.589999999999999e-05, + "loss": 2.3354, + "step": 3840 + }, + { + "epoch": 0.04, + "learning_rate": 9.614999999999999e-05, + "loss": 2.3411, + "step": 3850 + }, + { + "epoch": 0.04, + "learning_rate": 9.64e-05, + "loss": 2.3319, + "step": 3860 + }, + { + "epoch": 0.04, + "learning_rate": 9.664999999999999e-05, + "loss": 2.339, + "step": 3870 + }, + { + "epoch": 0.04, + "learning_rate": 9.69e-05, + "loss": 2.3017, + "step": 3880 + }, + { + "epoch": 0.04, + "learning_rate": 9.714999999999999e-05, + "loss": 2.3514, + "step": 3890 + }, + { + "epoch": 0.04, + "learning_rate": 9.74e-05, + "loss": 2.3212, + "step": 3900 + }, + { + "epoch": 0.04, + "learning_rate": 9.764999999999999e-05, + "loss": 2.3247, + "step": 3910 + }, + { + "epoch": 0.04, + "learning_rate": 9.789999999999998e-05, + "loss": 2.3187, + "step": 3920 + }, + { + "epoch": 0.04, + "learning_rate": 9.814999999999999e-05, + "loss": 2.302, + "step": 3930 + }, + { + "epoch": 0.04, + "learning_rate": 9.839999999999999e-05, + "loss": 2.3278, + "step": 3940 + }, + { + "epoch": 0.04, + "learning_rate": 9.864999999999999e-05, + "loss": 2.321, + "step": 3950 + }, + { + "epoch": 0.04, + "learning_rate": 9.889999999999999e-05, + "loss": 2.2785, + "step": 3960 + }, + { + "epoch": 0.04, + "learning_rate": 9.915e-05, + "loss": 2.2715, + "step": 3970 + }, + { + "epoch": 0.04, + "learning_rate": 9.939999999999999e-05, + "loss": 2.2599, + "step": 3980 + }, + { + "epoch": 0.04, + "learning_rate": 9.965e-05, + "loss": 2.2777, + "step": 3990 + }, + { + "epoch": 0.04, + "learning_rate": 9.99e-05, + "loss": 2.2513, + "step": 4000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010014999999999998, + "loss": 2.2255, + "step": 4010 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010039999999999999, + "loss": 2.2582, + "step": 4020 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010065, + "loss": 2.2871, + "step": 4030 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010089999999999999, + "loss": 2.2631, + "step": 4040 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010114999999999999, + "loss": 2.2364, + "step": 4050 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001014, + "loss": 2.2495, + "step": 4060 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010164999999999999, + "loss": 2.218, + "step": 4070 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001019, + "loss": 2.2098, + "step": 4080 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010215, + "loss": 2.2088, + "step": 4090 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010239999999999998, + "loss": 2.1903, + "step": 4100 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010264999999999999, + "loss": 2.2031, + "step": 4110 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001029, + "loss": 2.1972, + "step": 4120 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010314999999999999, + "loss": 2.1614, + "step": 4130 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010339999999999999, + "loss": 2.1714, + "step": 4140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010364999999999999, + "loss": 2.1924, + "step": 4150 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010389999999999999, + "loss": 2.1667, + "step": 4160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010415, + "loss": 2.1696, + "step": 4170 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010439999999999999, + "loss": 2.1879, + "step": 4180 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010465, + "loss": 2.1699, + "step": 4190 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001049, + "loss": 2.1627, + "step": 4200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010514999999999998, + "loss": 2.1355, + "step": 4210 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010539999999999999, + "loss": 2.1718, + "step": 4220 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010564999999999999, + "loss": 2.1131, + "step": 4230 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010589999999999999, + "loss": 2.099, + "step": 4240 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010614999999999999, + "loss": 2.138, + "step": 4250 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001064, + "loss": 2.1346, + "step": 4260 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010664999999999999, + "loss": 2.1164, + "step": 4270 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001069, + "loss": 2.1027, + "step": 4280 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010715, + "loss": 2.1362, + "step": 4290 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010739999999999998, + "loss": 2.1061, + "step": 4300 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010764999999999999, + "loss": 2.097, + "step": 4310 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001079, + "loss": 2.0938, + "step": 4320 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010814999999999999, + "loss": 2.081, + "step": 4330 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010839999999999999, + "loss": 2.0956, + "step": 4340 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010865, + "loss": 2.0812, + "step": 4350 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010889999999999999, + "loss": 2.0977, + "step": 4360 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010915, + "loss": 2.0734, + "step": 4370 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010939999999999998, + "loss": 2.0834, + "step": 4380 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010964999999999998, + "loss": 2.0577, + "step": 4390 + }, + { + "epoch": 0.04, + "learning_rate": 0.00010989999999999999, + "loss": 2.0629, + "step": 4400 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011014999999999998, + "loss": 2.0315, + "step": 4410 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011039999999999999, + "loss": 2.0288, + "step": 4420 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011064999999999999, + "loss": 2.0043, + "step": 4430 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011089999999999999, + "loss": 2.0263, + "step": 4440 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011114999999999999, + "loss": 2.0275, + "step": 4450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001114, + "loss": 2.0508, + "step": 4460 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011164999999999999, + "loss": 2.0482, + "step": 4470 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001119, + "loss": 2.0245, + "step": 4480 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011215, + "loss": 2.0276, + "step": 4490 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011239999999999998, + "loss": 2.0157, + "step": 4500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011264999999999999, + "loss": 2.0158, + "step": 4510 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011289999999999999, + "loss": 1.9956, + "step": 4520 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011314999999999999, + "loss": 2.0053, + "step": 4530 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011339999999999999, + "loss": 1.9749, + "step": 4540 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011365, + "loss": 1.9494, + "step": 4550 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011389999999999999, + "loss": 1.9653, + "step": 4560 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011415, + "loss": 1.9768, + "step": 4570 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001144, + "loss": 1.9863, + "step": 4580 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011464999999999998, + "loss": 1.9441, + "step": 4590 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011489999999999999, + "loss": 1.9764, + "step": 4600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011515, + "loss": 1.9818, + "step": 4610 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011539999999999999, + "loss": 1.9608, + "step": 4620 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011564999999999999, + "loss": 1.9741, + "step": 4630 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011589999999999998, + "loss": 1.9437, + "step": 4640 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011614999999999999, + "loss": 1.9227, + "step": 4650 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001164, + "loss": 1.9289, + "step": 4660 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011664999999999998, + "loss": 1.9195, + "step": 4670 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011689999999999998, + "loss": 1.9182, + "step": 4680 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011714999999999999, + "loss": 1.8692, + "step": 4690 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011739999999999998, + "loss": 1.909, + "step": 4700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011762499999999999, + "loss": 1.931, + "step": 4710 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011787499999999999, + "loss": 1.8903, + "step": 4720 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011812499999999998, + "loss": 1.9148, + "step": 4730 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011837499999999999, + "loss": 1.9032, + "step": 4740 + }, + { + "epoch": 0.05, + "learning_rate": 0.000118625, + "loss": 1.8977, + "step": 4750 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011887499999999999, + "loss": 1.8749, + "step": 4760 + }, + { + "epoch": 0.05, + "learning_rate": 0.000119125, + "loss": 1.8801, + "step": 4770 + }, + { + "epoch": 0.05, + "learning_rate": 0.00011937499999999999, + "loss": 1.8636, + "step": 4780 + }, + { + "epoch": 0.05, + "learning_rate": 0.000119625, + "loss": 1.8592, + "step": 4790 + }, + { + "epoch": 0.05, + "learning_rate": 0.000119875, + "loss": 1.8598, + "step": 4800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012012499999999998, + "loss": 1.8642, + "step": 4810 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012037499999999999, + "loss": 1.8885, + "step": 4820 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012062499999999999, + "loss": 1.8736, + "step": 4830 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012087499999999998, + "loss": 1.8383, + "step": 4840 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012112499999999999, + "loss": 1.861, + "step": 4850 + }, + { + "epoch": 0.05, + "learning_rate": 0.000121375, + "loss": 1.8416, + "step": 4860 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012162499999999999, + "loss": 1.8399, + "step": 4870 + }, + { + "epoch": 0.05, + "learning_rate": 0.000121875, + "loss": 1.8497, + "step": 4880 + }, + { + "epoch": 0.05, + "learning_rate": 0.000122125, + "loss": 1.8374, + "step": 4890 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012237499999999998, + "loss": 1.8468, + "step": 4900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012262499999999999, + "loss": 1.814, + "step": 4910 + }, + { + "epoch": 0.05, + "learning_rate": 0.000122875, + "loss": 1.8219, + "step": 4920 + }, + { + "epoch": 0.05, + "learning_rate": 0.000123125, + "loss": 1.8261, + "step": 4930 + }, + { + "epoch": 0.05, + "learning_rate": 0.000123375, + "loss": 1.8272, + "step": 4940 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012362499999999998, + "loss": 1.822, + "step": 4950 + }, + { + "epoch": 0.05, + "learning_rate": 0.000123875, + "loss": 1.8071, + "step": 4960 + }, + { + "epoch": 0.05, + "learning_rate": 0.000124125, + "loss": 1.825, + "step": 4970 + }, + { + "epoch": 0.05, + "learning_rate": 0.000124375, + "loss": 1.8145, + "step": 4980 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012462499999999998, + "loss": 1.8154, + "step": 4990 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012487499999999999, + "loss": 1.7897, + "step": 5000 + }, + { + "epoch": 0.05, + "eval_accuracy": 0.6654779469588099, + "eval_loss": 2.021484375, + "eval_runtime": 98.4067, + "eval_samples_per_second": 812.953, + "eval_steps_per_second": 1.595, + "step": 5000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000125125, + "loss": 1.7843, + "step": 5010 + }, + { + "epoch": 0.05, + "learning_rate": 0.000125375, + "loss": 1.7975, + "step": 5020 + }, + { + "epoch": 0.05, + "learning_rate": 0.000125625, + "loss": 1.7791, + "step": 5030 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012587499999999998, + "loss": 1.8006, + "step": 5040 + }, + { + "epoch": 0.05, + "learning_rate": 0.000126125, + "loss": 1.7863, + "step": 5050 + }, + { + "epoch": 0.05, + "learning_rate": 0.000126375, + "loss": 1.7956, + "step": 5060 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012662499999999997, + "loss": 1.7842, + "step": 5070 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012687499999999998, + "loss": 1.7654, + "step": 5080 + }, + { + "epoch": 0.05, + "learning_rate": 0.000127125, + "loss": 1.7536, + "step": 5090 + }, + { + "epoch": 0.05, + "learning_rate": 0.000127375, + "loss": 1.7645, + "step": 5100 + }, + { + "epoch": 0.05, + "learning_rate": 0.000127625, + "loss": 1.7458, + "step": 5110 + }, + { + "epoch": 0.05, + "learning_rate": 0.000127875, + "loss": 1.7508, + "step": 5120 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012812499999999998, + "loss": 1.7588, + "step": 5130 + }, + { + "epoch": 0.05, + "learning_rate": 0.000128375, + "loss": 1.7645, + "step": 5140 + }, + { + "epoch": 0.05, + "learning_rate": 0.000128625, + "loss": 1.7593, + "step": 5150 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012887499999999997, + "loss": 1.7307, + "step": 5160 + }, + { + "epoch": 0.05, + "learning_rate": 0.00012912499999999998, + "loss": 1.7578, + "step": 5170 + }, + { + "epoch": 0.05, + "learning_rate": 0.000129375, + "loss": 1.7509, + "step": 5180 + }, + { + "epoch": 0.05, + "learning_rate": 0.000129625, + "loss": 1.7421, + "step": 5190 + }, + { + "epoch": 0.05, + "learning_rate": 0.000129875, + "loss": 1.7378, + "step": 5200 + }, + { + "epoch": 0.05, + "learning_rate": 0.000130125, + "loss": 1.7253, + "step": 5210 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013037499999999998, + "loss": 1.7089, + "step": 5220 + }, + { + "epoch": 0.05, + "learning_rate": 0.000130625, + "loss": 1.6957, + "step": 5230 + }, + { + "epoch": 0.05, + "learning_rate": 0.000130875, + "loss": 1.6984, + "step": 5240 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013112499999999998, + "loss": 1.7067, + "step": 5250 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013137499999999998, + "loss": 1.7153, + "step": 5260 + }, + { + "epoch": 0.05, + "learning_rate": 0.000131625, + "loss": 1.7003, + "step": 5270 + }, + { + "epoch": 0.05, + "learning_rate": 0.000131875, + "loss": 1.7009, + "step": 5280 + }, + { + "epoch": 0.05, + "learning_rate": 0.000132125, + "loss": 1.7018, + "step": 5290 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013237499999999998, + "loss": 1.7049, + "step": 5300 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013262499999999998, + "loss": 1.6905, + "step": 5310 + }, + { + "epoch": 0.05, + "learning_rate": 0.000132875, + "loss": 1.6874, + "step": 5320 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013312499999999997, + "loss": 1.6798, + "step": 5330 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013337499999999998, + "loss": 1.681, + "step": 5340 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013362499999999998, + "loss": 1.6508, + "step": 5350 + }, + { + "epoch": 0.05, + "learning_rate": 0.000133875, + "loss": 1.6663, + "step": 5360 + }, + { + "epoch": 0.05, + "learning_rate": 0.000134125, + "loss": 1.6725, + "step": 5370 + }, + { + "epoch": 0.05, + "learning_rate": 0.000134375, + "loss": 1.6529, + "step": 5380 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013462499999999998, + "loss": 1.6667, + "step": 5390 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013487499999999999, + "loss": 1.6653, + "step": 5400 + }, + { + "epoch": 0.05, + "learning_rate": 0.000135125, + "loss": 1.6569, + "step": 5410 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013537499999999997, + "loss": 1.667, + "step": 5420 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013562499999999998, + "loss": 1.6484, + "step": 5430 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013587499999999998, + "loss": 1.621, + "step": 5440 + }, + { + "epoch": 0.05, + "learning_rate": 0.000136125, + "loss": 1.6436, + "step": 5450 + }, + { + "epoch": 0.05, + "learning_rate": 0.000136375, + "loss": 1.633, + "step": 5460 + }, + { + "epoch": 0.05, + "learning_rate": 0.000136625, + "loss": 1.6295, + "step": 5470 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013687499999999998, + "loss": 1.628, + "step": 5480 + }, + { + "epoch": 0.05, + "learning_rate": 0.00013712499999999999, + "loss": 1.6232, + "step": 5490 + }, + { + "epoch": 0.06, + "learning_rate": 0.000137375, + "loss": 1.6255, + "step": 5500 + }, + { + "epoch": 0.06, + "learning_rate": 0.000137625, + "loss": 1.6432, + "step": 5510 + }, + { + "epoch": 0.06, + "learning_rate": 0.000137875, + "loss": 1.6279, + "step": 5520 + }, + { + "epoch": 0.06, + "learning_rate": 0.00013812499999999998, + "loss": 1.6173, + "step": 5530 + }, + { + "epoch": 0.06, + "learning_rate": 0.000138375, + "loss": 1.614, + "step": 5540 + }, + { + "epoch": 0.06, + "learning_rate": 0.000138625, + "loss": 1.613, + "step": 5550 + }, + { + "epoch": 0.06, + "learning_rate": 0.00013887499999999997, + "loss": 1.6166, + "step": 5560 + }, + { + "epoch": 0.06, + "learning_rate": 0.00013912499999999998, + "loss": 1.6158, + "step": 5570 + }, + { + "epoch": 0.06, + "learning_rate": 0.00013937499999999999, + "loss": 1.6133, + "step": 5580 + }, + { + "epoch": 0.06, + "learning_rate": 0.000139625, + "loss": 1.5946, + "step": 5590 + }, + { + "epoch": 0.06, + "learning_rate": 0.000139875, + "loss": 1.6178, + "step": 5600 + }, + { + "epoch": 0.06, + "learning_rate": 0.000140125, + "loss": 1.6214, + "step": 5610 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014037499999999998, + "loss": 1.6187, + "step": 5620 + }, + { + "epoch": 0.06, + "learning_rate": 0.000140625, + "loss": 1.6119, + "step": 5630 + }, + { + "epoch": 0.06, + "learning_rate": 0.000140875, + "loss": 1.5932, + "step": 5640 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014112499999999997, + "loss": 1.6099, + "step": 5650 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014137499999999998, + "loss": 1.5841, + "step": 5660 + }, + { + "epoch": 0.06, + "learning_rate": 0.000141625, + "loss": 1.5941, + "step": 5670 + }, + { + "epoch": 0.06, + "learning_rate": 0.000141875, + "loss": 1.5714, + "step": 5680 + }, + { + "epoch": 0.06, + "learning_rate": 0.000142125, + "loss": 1.5596, + "step": 5690 + }, + { + "epoch": 0.06, + "learning_rate": 0.000142375, + "loss": 1.5825, + "step": 5700 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001426, + "loss": 1.5837, + "step": 5710 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014285, + "loss": 1.5929, + "step": 5720 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014309999999999998, + "loss": 1.5859, + "step": 5730 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014335, + "loss": 1.5909, + "step": 5740 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001436, + "loss": 1.5944, + "step": 5750 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014384999999999997, + "loss": 1.5592, + "step": 5760 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014409999999999998, + "loss": 1.5771, + "step": 5770 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014434999999999999, + "loss": 1.5739, + "step": 5780 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001446, + "loss": 1.5645, + "step": 5790 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014485, + "loss": 1.5484, + "step": 5800 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001451, + "loss": 1.5657, + "step": 5810 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014534999999999998, + "loss": 1.5617, + "step": 5820 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001456, + "loss": 1.5707, + "step": 5830 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014585, + "loss": 1.5512, + "step": 5840 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014609999999999997, + "loss": 1.5669, + "step": 5850 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014634999999999998, + "loss": 1.5415, + "step": 5860 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014659999999999999, + "loss": 1.5404, + "step": 5870 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014685, + "loss": 1.515, + "step": 5880 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001471, + "loss": 1.5422, + "step": 5890 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014734999999999998, + "loss": 1.496, + "step": 5900 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014759999999999998, + "loss": 1.4912, + "step": 5910 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014785, + "loss": 1.5192, + "step": 5920 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014809999999999997, + "loss": 1.5174, + "step": 5930 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014834999999999997, + "loss": 1.5201, + "step": 5940 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014859999999999998, + "loss": 1.5123, + "step": 5950 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014884999999999999, + "loss": 1.5244, + "step": 5960 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001491, + "loss": 1.5144, + "step": 5970 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014935, + "loss": 1.514, + "step": 5980 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014959999999999998, + "loss": 1.4987, + "step": 5990 + }, + { + "epoch": 0.06, + "learning_rate": 0.00014984999999999998, + "loss": 1.5266, + "step": 6000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001501, + "loss": 1.5074, + "step": 6010 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015034999999999997, + "loss": 1.4996, + "step": 6020 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015059999999999997, + "loss": 1.5075, + "step": 6030 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015084999999999998, + "loss": 1.4872, + "step": 6040 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001511, + "loss": 1.5025, + "step": 6050 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015134999999999997, + "loss": 1.4719, + "step": 6060 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015159999999999997, + "loss": 1.502, + "step": 6070 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015184999999999998, + "loss": 1.5157, + "step": 6080 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015209999999999998, + "loss": 1.4907, + "step": 6090 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015235, + "loss": 1.4747, + "step": 6100 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001526, + "loss": 1.4808, + "step": 6110 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015284999999999997, + "loss": 1.4805, + "step": 6120 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015309999999999998, + "loss": 1.4615, + "step": 6130 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015335, + "loss": 1.4887, + "step": 6140 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001536, + "loss": 1.453, + "step": 6150 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015385, + "loss": 1.463, + "step": 6160 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001541, + "loss": 1.4306, + "step": 6170 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015434999999999998, + "loss": 1.469, + "step": 6180 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001546, + "loss": 1.4942, + "step": 6190 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015485, + "loss": 1.4675, + "step": 6200 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001551, + "loss": 1.4585, + "step": 6210 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015535, + "loss": 1.467, + "step": 6220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015560000000000001, + "loss": 1.4841, + "step": 6230 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015584999999999997, + "loss": 1.4545, + "step": 6240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015609999999999997, + "loss": 1.4503, + "step": 6250 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015634999999999998, + "loss": 1.447, + "step": 6260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015659999999999998, + "loss": 1.4531, + "step": 6270 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015685, + "loss": 1.4416, + "step": 6280 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015709999999999997, + "loss": 1.4706, + "step": 6290 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015734999999999998, + "loss": 1.4469, + "step": 6300 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015759999999999998, + "loss": 1.4523, + "step": 6310 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015785, + "loss": 1.4449, + "step": 6320 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001581, + "loss": 1.4422, + "step": 6330 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015835, + "loss": 1.4359, + "step": 6340 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015859999999999998, + "loss": 1.4429, + "step": 6350 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015884999999999999, + "loss": 1.4271, + "step": 6360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001591, + "loss": 1.4198, + "step": 6370 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015935, + "loss": 1.4261, + "step": 6380 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001596, + "loss": 1.4375, + "step": 6390 + }, + { + "epoch": 0.06, + "learning_rate": 0.00015985, + "loss": 1.4319, + "step": 6400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016009999999999996, + "loss": 1.4446, + "step": 6410 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016034999999999997, + "loss": 1.4272, + "step": 6420 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016059999999999997, + "loss": 1.4311, + "step": 6430 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016084999999999998, + "loss": 1.4489, + "step": 6440 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016109999999999999, + "loss": 1.4271, + "step": 6450 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016135, + "loss": 1.4196, + "step": 6460 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016159999999999997, + "loss": 1.3898, + "step": 6470 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016184999999999998, + "loss": 1.4023, + "step": 6480 + }, + { + "epoch": 0.06, + "learning_rate": 0.00016209999999999998, + "loss": 1.4047, + "step": 6490 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016235, + "loss": 1.3904, + "step": 6500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001626, + "loss": 1.4102, + "step": 6510 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016284999999999997, + "loss": 1.3983, + "step": 6520 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016309999999999998, + "loss": 1.4131, + "step": 6530 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016334999999999999, + "loss": 1.4131, + "step": 6540 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001636, + "loss": 1.4097, + "step": 6550 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016385, + "loss": 1.3955, + "step": 6560 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001641, + "loss": 1.3888, + "step": 6570 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016434999999999998, + "loss": 1.3908, + "step": 6580 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001646, + "loss": 1.3946, + "step": 6590 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016485, + "loss": 1.3861, + "step": 6600 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016509999999999997, + "loss": 1.389, + "step": 6610 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016534999999999998, + "loss": 1.3817, + "step": 6620 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001656, + "loss": 1.3813, + "step": 6630 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016584999999999997, + "loss": 1.402, + "step": 6640 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016609999999999997, + "loss": 1.3934, + "step": 6650 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016634999999999998, + "loss": 1.3689, + "step": 6660 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016659999999999998, + "loss": 1.3701, + "step": 6670 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016685, + "loss": 1.365, + "step": 6680 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001671, + "loss": 1.3718, + "step": 6690 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016734999999999997, + "loss": 1.3546, + "step": 6700 + }, + { + "epoch": 0.07, + "learning_rate": 0.000167575, + "loss": 1.3649, + "step": 6710 + }, + { + "epoch": 0.07, + "learning_rate": 0.000167825, + "loss": 1.3595, + "step": 6720 + }, + { + "epoch": 0.07, + "learning_rate": 0.000168075, + "loss": 1.3644, + "step": 6730 + }, + { + "epoch": 0.07, + "learning_rate": 0.000168325, + "loss": 1.3749, + "step": 6740 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016857499999999996, + "loss": 1.3693, + "step": 6750 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016882499999999997, + "loss": 1.3468, + "step": 6760 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016907499999999998, + "loss": 1.3447, + "step": 6770 + }, + { + "epoch": 0.07, + "learning_rate": 0.00016932499999999998, + "loss": 1.3418, + "step": 6780 + }, + { + "epoch": 0.07, + "learning_rate": 0.000169575, + "loss": 1.3477, + "step": 6790 + }, + { + "epoch": 0.07, + "learning_rate": 0.000169825, + "loss": 1.3519, + "step": 6800 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017007499999999997, + "loss": 1.3188, + "step": 6810 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017032499999999998, + "loss": 1.3433, + "step": 6820 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017057499999999999, + "loss": 1.3405, + "step": 6830 + }, + { + "epoch": 0.07, + "learning_rate": 0.000170825, + "loss": 1.3375, + "step": 6840 + }, + { + "epoch": 0.07, + "learning_rate": 0.000171075, + "loss": 1.3299, + "step": 6850 + }, + { + "epoch": 0.07, + "learning_rate": 0.000171325, + "loss": 1.3495, + "step": 6860 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017157499999999998, + "loss": 1.3485, + "step": 6870 + }, + { + "epoch": 0.07, + "learning_rate": 0.000171825, + "loss": 1.3383, + "step": 6880 + }, + { + "epoch": 0.07, + "learning_rate": 0.000172075, + "loss": 1.3346, + "step": 6890 + }, + { + "epoch": 0.07, + "learning_rate": 0.000172325, + "loss": 1.3392, + "step": 6900 + }, + { + "epoch": 0.07, + "learning_rate": 0.000172575, + "loss": 1.3336, + "step": 6910 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017282499999999996, + "loss": 1.3228, + "step": 6920 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017307499999999996, + "loss": 1.3324, + "step": 6930 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017332499999999997, + "loss": 1.3237, + "step": 6940 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017357499999999998, + "loss": 1.3331, + "step": 6950 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017382499999999998, + "loss": 1.3495, + "step": 6960 + }, + { + "epoch": 0.07, + "learning_rate": 0.000174075, + "loss": 1.3498, + "step": 6970 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017432499999999997, + "loss": 1.3428, + "step": 6980 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017457499999999997, + "loss": 1.3347, + "step": 6990 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017482499999999998, + "loss": 1.3182, + "step": 7000 + }, + { + "epoch": 0.07, + "learning_rate": 0.000175075, + "loss": 1.3255, + "step": 7010 + }, + { + "epoch": 0.07, + "learning_rate": 0.000175325, + "loss": 1.3057, + "step": 7020 + }, + { + "epoch": 0.07, + "learning_rate": 0.000175575, + "loss": 1.3166, + "step": 7030 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017582499999999998, + "loss": 1.3268, + "step": 7040 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017607499999999998, + "loss": 1.3121, + "step": 7050 + }, + { + "epoch": 0.07, + "learning_rate": 0.000176325, + "loss": 1.301, + "step": 7060 + }, + { + "epoch": 0.07, + "learning_rate": 0.000176575, + "loss": 1.3167, + "step": 7070 + }, + { + "epoch": 0.07, + "learning_rate": 0.000176825, + "loss": 1.3102, + "step": 7080 + }, + { + "epoch": 0.07, + "learning_rate": 0.000177075, + "loss": 1.329, + "step": 7090 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017732499999999996, + "loss": 1.3241, + "step": 7100 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017757499999999997, + "loss": 1.3216, + "step": 7110 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017782499999999997, + "loss": 1.3041, + "step": 7120 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017807499999999998, + "loss": 1.2956, + "step": 7130 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017832499999999998, + "loss": 1.3219, + "step": 7140 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017857499999999996, + "loss": 1.2878, + "step": 7150 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017882499999999997, + "loss": 1.2961, + "step": 7160 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017907499999999998, + "loss": 1.2876, + "step": 7170 + }, + { + "epoch": 0.07, + "learning_rate": 0.00017932499999999998, + "loss": 1.3088, + "step": 7180 + }, + { + "epoch": 0.07, + "learning_rate": 0.000179575, + "loss": 1.296, + "step": 7190 + }, + { + "epoch": 0.07, + "learning_rate": 0.000179825, + "loss": 1.3052, + "step": 7200 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018007499999999997, + "loss": 1.3145, + "step": 7210 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018032499999999998, + "loss": 1.2777, + "step": 7220 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018057499999999998, + "loss": 1.2916, + "step": 7230 + }, + { + "epoch": 0.07, + "learning_rate": 0.000180825, + "loss": 1.2826, + "step": 7240 + }, + { + "epoch": 0.07, + "learning_rate": 0.000181075, + "loss": 1.2731, + "step": 7250 + }, + { + "epoch": 0.07, + "learning_rate": 0.000181325, + "loss": 1.2718, + "step": 7260 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018157499999999998, + "loss": 1.2597, + "step": 7270 + }, + { + "epoch": 0.07, + "learning_rate": 0.000181825, + "loss": 1.2753, + "step": 7280 + }, + { + "epoch": 0.07, + "learning_rate": 0.000182075, + "loss": 1.2755, + "step": 7290 + }, + { + "epoch": 0.07, + "learning_rate": 0.000182325, + "loss": 1.2735, + "step": 7300 + }, + { + "epoch": 0.07, + "learning_rate": 0.000182575, + "loss": 1.269, + "step": 7310 + }, + { + "epoch": 0.07, + "learning_rate": 0.000182825, + "loss": 1.2967, + "step": 7320 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018307499999999996, + "loss": 1.283, + "step": 7330 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018332499999999997, + "loss": 1.2773, + "step": 7340 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018357499999999998, + "loss": 1.2836, + "step": 7350 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018382499999999998, + "loss": 1.272, + "step": 7360 + }, + { + "epoch": 0.07, + "learning_rate": 0.000184075, + "loss": 1.2588, + "step": 7370 + }, + { + "epoch": 0.07, + "learning_rate": 0.000184325, + "loss": 1.2535, + "step": 7380 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018457499999999997, + "loss": 1.2585, + "step": 7390 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018482499999999998, + "loss": 1.2544, + "step": 7400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018507499999999999, + "loss": 1.236, + "step": 7410 + }, + { + "epoch": 0.07, + "learning_rate": 0.000185325, + "loss": 1.26, + "step": 7420 + }, + { + "epoch": 0.07, + "learning_rate": 0.000185575, + "loss": 1.2725, + "step": 7430 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018582499999999998, + "loss": 1.2621, + "step": 7440 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018607499999999998, + "loss": 1.2566, + "step": 7450 + }, + { + "epoch": 0.07, + "learning_rate": 0.000186325, + "loss": 1.2642, + "step": 7460 + }, + { + "epoch": 0.07, + "learning_rate": 0.000186575, + "loss": 1.2521, + "step": 7470 + }, + { + "epoch": 0.07, + "learning_rate": 0.000186825, + "loss": 1.2418, + "step": 7480 + }, + { + "epoch": 0.07, + "learning_rate": 0.000187075, + "loss": 1.2386, + "step": 7490 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018732499999999996, + "loss": 1.2496, + "step": 7500 + }, + { + "epoch": 0.07, + "eval_accuracy": 0.7488450887376084, + "eval_loss": 1.373046875, + "eval_runtime": 97.1795, + "eval_samples_per_second": 823.219, + "eval_steps_per_second": 1.616, + "step": 7500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018757499999999997, + "loss": 1.2656, + "step": 7510 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018782499999999997, + "loss": 1.2877, + "step": 7520 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018807499999999998, + "loss": 1.2559, + "step": 7530 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018832499999999998, + "loss": 1.289, + "step": 7540 + }, + { + "epoch": 0.08, + "learning_rate": 0.000188575, + "loss": 1.2772, + "step": 7550 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018882499999999997, + "loss": 1.2847, + "step": 7560 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018907499999999997, + "loss": 1.2564, + "step": 7570 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018932499999999998, + "loss": 1.276, + "step": 7580 + }, + { + "epoch": 0.08, + "learning_rate": 0.000189575, + "loss": 1.2677, + "step": 7590 + }, + { + "epoch": 0.08, + "learning_rate": 0.000189825, + "loss": 1.2607, + "step": 7600 + }, + { + "epoch": 0.08, + "learning_rate": 0.000190075, + "loss": 1.2511, + "step": 7610 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019032499999999998, + "loss": 1.2928, + "step": 7620 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019057499999999998, + "loss": 1.2494, + "step": 7630 + }, + { + "epoch": 0.08, + "learning_rate": 0.000190825, + "loss": 1.2637, + "step": 7640 + }, + { + "epoch": 0.08, + "learning_rate": 0.000191075, + "loss": 1.2602, + "step": 7650 + }, + { + "epoch": 0.08, + "learning_rate": 0.000191325, + "loss": 1.2572, + "step": 7660 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019157499999999995, + "loss": 1.2685, + "step": 7670 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019182499999999996, + "loss": 1.2509, + "step": 7680 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019207499999999997, + "loss": 1.2515, + "step": 7690 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019232499999999997, + "loss": 1.2698, + "step": 7700 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019255, + "loss": 1.2357, + "step": 7710 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001928, + "loss": 1.2434, + "step": 7720 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019304999999999998, + "loss": 1.2472, + "step": 7730 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019329999999999998, + "loss": 1.2485, + "step": 7740 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019355, + "loss": 1.2525, + "step": 7750 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001938, + "loss": 1.2505, + "step": 7760 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019405, + "loss": 1.2413, + "step": 7770 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019429999999999998, + "loss": 1.2561, + "step": 7780 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019454999999999999, + "loss": 1.2504, + "step": 7790 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001948, + "loss": 1.2537, + "step": 7800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019504999999999997, + "loss": 1.2451, + "step": 7810 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019529999999999998, + "loss": 1.2397, + "step": 7820 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019554999999999998, + "loss": 1.2262, + "step": 7830 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019579999999999996, + "loss": 1.2322, + "step": 7840 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019604999999999997, + "loss": 1.2229, + "step": 7850 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019629999999999997, + "loss": 1.2553, + "step": 7860 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019654999999999998, + "loss": 1.2222, + "step": 7870 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019679999999999999, + "loss": 1.2358, + "step": 7880 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019705, + "loss": 1.2192, + "step": 7890 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019729999999999997, + "loss": 1.2226, + "step": 7900 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019754999999999998, + "loss": 1.2095, + "step": 7910 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019779999999999998, + "loss": 1.2142, + "step": 7920 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019805, + "loss": 1.216, + "step": 7930 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001983, + "loss": 1.2335, + "step": 7940 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019855, + "loss": 1.2481, + "step": 7950 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019879999999999998, + "loss": 1.2291, + "step": 7960 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019905, + "loss": 1.2168, + "step": 7970 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001993, + "loss": 1.2517, + "step": 7980 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019955, + "loss": 1.2335, + "step": 7990 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001998, + "loss": 1.2429, + "step": 8000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020004999999999996, + "loss": 1.2454, + "step": 8010 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020029999999999996, + "loss": 1.2418, + "step": 8020 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020054999999999997, + "loss": 1.2321, + "step": 8030 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020079999999999997, + "loss": 1.2369, + "step": 8040 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020104999999999998, + "loss": 1.2323, + "step": 8050 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002013, + "loss": 1.2338, + "step": 8060 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020154999999999997, + "loss": 1.2371, + "step": 8070 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020179999999999997, + "loss": 1.2068, + "step": 8080 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020204999999999998, + "loss": 1.2246, + "step": 8090 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020229999999999998, + "loss": 1.2138, + "step": 8100 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020255, + "loss": 1.2173, + "step": 8110 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002028, + "loss": 1.2031, + "step": 8120 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020304999999999998, + "loss": 1.222, + "step": 8130 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020329999999999998, + "loss": 1.212, + "step": 8140 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020355, + "loss": 1.2242, + "step": 8150 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002038, + "loss": 1.2087, + "step": 8160 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020405, + "loss": 1.216, + "step": 8170 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002043, + "loss": 1.2334, + "step": 8180 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020454999999999996, + "loss": 1.2154, + "step": 8190 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020479999999999996, + "loss": 1.2098, + "step": 8200 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020504999999999997, + "loss": 1.1788, + "step": 8210 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020529999999999998, + "loss": 1.1994, + "step": 8220 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020554999999999998, + "loss": 1.2037, + "step": 8230 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002058, + "loss": 1.2082, + "step": 8240 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020604999999999997, + "loss": 1.2125, + "step": 8250 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020629999999999997, + "loss": 1.1954, + "step": 8260 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020654999999999998, + "loss": 1.2014, + "step": 8270 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020679999999999999, + "loss": 1.2146, + "step": 8280 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020705, + "loss": 1.2013, + "step": 8290 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020729999999999997, + "loss": 1.2174, + "step": 8300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020754999999999998, + "loss": 1.2043, + "step": 8310 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020779999999999998, + "loss": 1.1967, + "step": 8320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020805, + "loss": 1.1849, + "step": 8330 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002083, + "loss": 1.1729, + "step": 8340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020855, + "loss": 1.1918, + "step": 8350 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020879999999999998, + "loss": 1.1862, + "step": 8360 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020904999999999999, + "loss": 1.1838, + "step": 8370 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002093, + "loss": 1.1633, + "step": 8380 + }, + { + "epoch": 0.08, + "learning_rate": 0.00020955, + "loss": 1.175, + "step": 8390 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002098, + "loss": 1.1767, + "step": 8400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021004999999999998, + "loss": 1.184, + "step": 8410 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021029999999999996, + "loss": 1.1896, + "step": 8420 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021054999999999997, + "loss": 1.2001, + "step": 8430 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021079999999999997, + "loss": 1.1948, + "step": 8440 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021104999999999998, + "loss": 1.1864, + "step": 8450 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021129999999999999, + "loss": 1.1713, + "step": 8460 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021155, + "loss": 1.1674, + "step": 8470 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021179999999999997, + "loss": 1.1793, + "step": 8480 + }, + { + "epoch": 0.08, + "learning_rate": 0.00021204999999999998, + "loss": 1.1904, + "step": 8490 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021229999999999998, + "loss": 1.1735, + "step": 8500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021255, + "loss": 1.1594, + "step": 8510 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002128, + "loss": 1.1885, + "step": 8520 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021304999999999997, + "loss": 1.1641, + "step": 8530 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021329999999999998, + "loss": 1.1783, + "step": 8540 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021355, + "loss": 1.1782, + "step": 8550 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002138, + "loss": 1.1854, + "step": 8560 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021405, + "loss": 1.1445, + "step": 8570 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002143, + "loss": 1.1713, + "step": 8580 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021454999999999996, + "loss": 1.1643, + "step": 8590 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021479999999999996, + "loss": 1.1834, + "step": 8600 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021504999999999997, + "loss": 1.1736, + "step": 8610 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021529999999999997, + "loss": 1.1749, + "step": 8620 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021554999999999998, + "loss": 1.1803, + "step": 8630 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002158, + "loss": 1.1583, + "step": 8640 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021604999999999997, + "loss": 1.1511, + "step": 8650 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021629999999999997, + "loss": 1.1896, + "step": 8660 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021654999999999998, + "loss": 1.1664, + "step": 8670 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021679999999999998, + "loss": 1.1734, + "step": 8680 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021705, + "loss": 1.1613, + "step": 8690 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002173, + "loss": 1.1422, + "step": 8700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021752499999999996, + "loss": 1.1381, + "step": 8710 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021777499999999997, + "loss": 1.1172, + "step": 8720 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021802499999999997, + "loss": 1.131, + "step": 8730 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021827499999999998, + "loss": 1.1344, + "step": 8740 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021852499999999999, + "loss": 1.1266, + "step": 8750 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021877499999999996, + "loss": 1.1072, + "step": 8760 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021902499999999997, + "loss": 1.1199, + "step": 8770 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021927499999999998, + "loss": 1.1395, + "step": 8780 + }, + { + "epoch": 0.09, + "learning_rate": 0.00021952499999999998, + "loss": 1.1471, + "step": 8790 + }, + { + "epoch": 0.09, + "learning_rate": 0.000219775, + "loss": 1.1389, + "step": 8800 + }, + { + "epoch": 0.09, + "learning_rate": 0.000220025, + "loss": 1.1376, + "step": 8810 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022027499999999997, + "loss": 1.1415, + "step": 8820 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022052499999999998, + "loss": 1.158, + "step": 8830 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022077499999999999, + "loss": 1.1549, + "step": 8840 + }, + { + "epoch": 0.09, + "learning_rate": 0.000221025, + "loss": 1.1279, + "step": 8850 + }, + { + "epoch": 0.09, + "learning_rate": 0.000221275, + "loss": 1.1354, + "step": 8860 + }, + { + "epoch": 0.09, + "learning_rate": 0.000221525, + "loss": 1.1481, + "step": 8870 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022177499999999996, + "loss": 1.1304, + "step": 8880 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022202499999999996, + "loss": 1.1206, + "step": 8890 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022227499999999997, + "loss": 1.1287, + "step": 8900 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022252499999999997, + "loss": 1.1263, + "step": 8910 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022277499999999998, + "loss": 1.1377, + "step": 8920 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022302499999999996, + "loss": 1.1323, + "step": 8930 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022327499999999997, + "loss": 1.1399, + "step": 8940 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022352499999999997, + "loss": 1.1158, + "step": 8950 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022377499999999998, + "loss": 1.119, + "step": 8960 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022402499999999998, + "loss": 1.1351, + "step": 8970 + }, + { + "epoch": 0.09, + "learning_rate": 0.000224275, + "loss": 1.1323, + "step": 8980 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022452499999999997, + "loss": 1.128, + "step": 8990 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022477499999999997, + "loss": 1.1033, + "step": 9000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022502499999999998, + "loss": 1.1259, + "step": 9010 + }, + { + "epoch": 0.09, + "learning_rate": 0.000225275, + "loss": 1.1012, + "step": 9020 + }, + { + "epoch": 0.09, + "learning_rate": 0.000225525, + "loss": 1.1288, + "step": 9030 + }, + { + "epoch": 0.09, + "learning_rate": 0.000225775, + "loss": 1.1358, + "step": 9040 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022602499999999998, + "loss": 1.1197, + "step": 9050 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022627499999999998, + "loss": 1.1229, + "step": 9060 + }, + { + "epoch": 0.09, + "learning_rate": 0.000226525, + "loss": 1.1221, + "step": 9070 + }, + { + "epoch": 0.09, + "learning_rate": 0.000226775, + "loss": 1.1059, + "step": 9080 + }, + { + "epoch": 0.09, + "learning_rate": 0.000227025, + "loss": 1.1354, + "step": 9090 + }, + { + "epoch": 0.09, + "learning_rate": 0.000227275, + "loss": 1.1306, + "step": 9100 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022752499999999996, + "loss": 1.1264, + "step": 9110 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022777499999999997, + "loss": 1.1259, + "step": 9120 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022802499999999997, + "loss": 1.1077, + "step": 9130 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022827499999999998, + "loss": 1.1097, + "step": 9140 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022852499999999998, + "loss": 1.1277, + "step": 9150 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022877499999999996, + "loss": 1.1337, + "step": 9160 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022902499999999997, + "loss": 1.1278, + "step": 9170 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022927499999999998, + "loss": 1.1213, + "step": 9180 + }, + { + "epoch": 0.09, + "learning_rate": 0.00022952499999999998, + "loss": 1.1104, + "step": 9190 + }, + { + "epoch": 0.09, + "learning_rate": 0.000229775, + "loss": 1.0953, + "step": 9200 + }, + { + "epoch": 0.09, + "learning_rate": 0.000230025, + "loss": 1.1256, + "step": 9210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023027499999999997, + "loss": 1.1117, + "step": 9220 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023052499999999998, + "loss": 1.1406, + "step": 9230 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023077499999999999, + "loss": 1.1138, + "step": 9240 + }, + { + "epoch": 0.09, + "learning_rate": 0.000231025, + "loss": 1.1336, + "step": 9250 + }, + { + "epoch": 0.09, + "learning_rate": 0.000231275, + "loss": 1.1208, + "step": 9260 + }, + { + "epoch": 0.09, + "learning_rate": 0.000231525, + "loss": 1.1263, + "step": 9270 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023177499999999996, + "loss": 1.1306, + "step": 9280 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023202499999999996, + "loss": 1.1261, + "step": 9290 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023227499999999997, + "loss": 1.1191, + "step": 9300 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023252499999999997, + "loss": 1.1023, + "step": 9310 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023277499999999998, + "loss": 1.0853, + "step": 9320 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023302499999999999, + "loss": 1.1104, + "step": 9330 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023327499999999996, + "loss": 1.1077, + "step": 9340 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023352499999999997, + "loss": 1.1262, + "step": 9350 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023377499999999998, + "loss": 1.125, + "step": 9360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023402499999999998, + "loss": 1.1072, + "step": 9370 + }, + { + "epoch": 0.09, + "learning_rate": 0.000234275, + "loss": 1.11, + "step": 9380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023452499999999997, + "loss": 1.0846, + "step": 9390 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023477499999999997, + "loss": 1.0895, + "step": 9400 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023502499999999998, + "loss": 1.0858, + "step": 9410 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023527499999999999, + "loss": 1.0886, + "step": 9420 + }, + { + "epoch": 0.09, + "learning_rate": 0.000235525, + "loss": 1.0858, + "step": 9430 + }, + { + "epoch": 0.09, + "learning_rate": 0.000235775, + "loss": 1.0684, + "step": 9440 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023602499999999998, + "loss": 1.0701, + "step": 9450 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023627499999999998, + "loss": 1.0703, + "step": 9460 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023652499999999996, + "loss": 1.0928, + "step": 9470 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023677499999999997, + "loss": 1.0823, + "step": 9480 + }, + { + "epoch": 0.09, + "learning_rate": 0.00023702499999999997, + "loss": 1.0958, + "step": 9490 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023727499999999998, + "loss": 1.0629, + "step": 9500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023752499999999996, + "loss": 1.0654, + "step": 9510 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023777499999999997, + "loss": 1.063, + "step": 9520 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023802499999999997, + "loss": 1.0727, + "step": 9530 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023827499999999998, + "loss": 1.0725, + "step": 9540 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023852499999999998, + "loss": 1.0759, + "step": 9550 + }, + { + "epoch": 0.1, + "learning_rate": 0.000238775, + "loss": 1.0493, + "step": 9560 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023902499999999997, + "loss": 1.0511, + "step": 9570 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023927499999999997, + "loss": 1.0333, + "step": 9580 + }, + { + "epoch": 0.1, + "learning_rate": 0.00023952499999999998, + "loss": 1.0718, + "step": 9590 + }, + { + "epoch": 0.1, + "learning_rate": 0.000239775, + "loss": 1.0631, + "step": 9600 + }, + { + "epoch": 0.1, + "learning_rate": 0.000240025, + "loss": 1.0662, + "step": 9610 + }, + { + "epoch": 0.1, + "learning_rate": 0.000240275, + "loss": 1.0533, + "step": 9620 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024052499999999998, + "loss": 1.0558, + "step": 9630 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024077499999999998, + "loss": 1.0694, + "step": 9640 + }, + { + "epoch": 0.1, + "learning_rate": 0.000241025, + "loss": 1.1064, + "step": 9650 + }, + { + "epoch": 0.1, + "learning_rate": 0.000241275, + "loss": 1.0828, + "step": 9660 + }, + { + "epoch": 0.1, + "learning_rate": 0.000241525, + "loss": 1.0639, + "step": 9670 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024177499999999995, + "loss": 1.0705, + "step": 9680 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024202499999999996, + "loss": 1.0866, + "step": 9690 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024227499999999997, + "loss": 1.0813, + "step": 9700 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024252499999999997, + "loss": 1.081, + "step": 9710 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024275, + "loss": 1.103, + "step": 9720 + }, + { + "epoch": 0.1, + "learning_rate": 0.000243, + "loss": 1.0906, + "step": 9730 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024324999999999998, + "loss": 1.0728, + "step": 9740 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024349999999999998, + "loss": 1.0843, + "step": 9750 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024375, + "loss": 1.0672, + "step": 9760 + }, + { + "epoch": 0.1, + "learning_rate": 0.000244, + "loss": 1.0846, + "step": 9770 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024425, + "loss": 1.0973, + "step": 9780 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002445, + "loss": 1.0933, + "step": 9790 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024474999999999996, + "loss": 1.0767, + "step": 9800 + }, + { + "epoch": 0.1, + "learning_rate": 0.000245, + "loss": 1.0875, + "step": 9810 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024524999999999997, + "loss": 1.0775, + "step": 9820 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002455, + "loss": 1.0833, + "step": 9830 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024575, + "loss": 1.0829, + "step": 9840 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024599999999999996, + "loss": 1.0804, + "step": 9850 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024625, + "loss": 1.0755, + "step": 9860 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002465, + "loss": 1.0638, + "step": 9870 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024675, + "loss": 1.0426, + "step": 9880 + }, + { + "epoch": 0.1, + "learning_rate": 0.000247, + "loss": 1.0552, + "step": 9890 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024724999999999997, + "loss": 1.0444, + "step": 9900 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024749999999999994, + "loss": 1.07, + "step": 9910 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024775, + "loss": 1.078, + "step": 9920 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024799999999999996, + "loss": 1.0591, + "step": 9930 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024825, + "loss": 1.0609, + "step": 9940 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024849999999999997, + "loss": 1.0778, + "step": 9950 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024875, + "loss": 1.0898, + "step": 9960 + }, + { + "epoch": 0.1, + "learning_rate": 0.000249, + "loss": 1.0609, + "step": 9970 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024924999999999996, + "loss": 1.0747, + "step": 9980 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002495, + "loss": 1.0724, + "step": 9990 + }, + { + "epoch": 0.1, + "learning_rate": 0.00024974999999999997, + "loss": 1.0477, + "step": 10000 + }, + { + "epoch": 0.1, + "eval_accuracy": 0.7842036631094624, + "eval_loss": 1.115234375, + "eval_runtime": 96.9888, + "eval_samples_per_second": 824.838, + "eval_steps_per_second": 1.619, + "step": 10000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025, + "loss": 1.0264, + "step": 10010 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025025, + "loss": 1.0462, + "step": 10020 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025049999999999996, + "loss": 1.0428, + "step": 10030 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025075, + "loss": 1.0627, + "step": 10040 + }, + { + "epoch": 0.1, + "learning_rate": 0.000251, + "loss": 1.049, + "step": 10050 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025125, + "loss": 1.0465, + "step": 10060 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002515, + "loss": 1.0318, + "step": 10070 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025174999999999997, + "loss": 1.0378, + "step": 10080 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025199999999999995, + "loss": 1.0406, + "step": 10090 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025225, + "loss": 1.0562, + "step": 10100 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025249999999999996, + "loss": 1.0432, + "step": 10110 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025275, + "loss": 1.0308, + "step": 10120 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025299999999999997, + "loss": 1.0178, + "step": 10130 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025324999999999995, + "loss": 1.0565, + "step": 10140 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002535, + "loss": 1.0464, + "step": 10150 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025374999999999996, + "loss": 1.0578, + "step": 10160 + }, + { + "epoch": 0.1, + "learning_rate": 0.000254, + "loss": 1.0467, + "step": 10170 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025425, + "loss": 1.0362, + "step": 10180 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002545, + "loss": 1.0415, + "step": 10190 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025475, + "loss": 1.0466, + "step": 10200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025499999999999996, + "loss": 1.0281, + "step": 10210 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025525, + "loss": 1.0507, + "step": 10220 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002555, + "loss": 1.0262, + "step": 10230 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025575, + "loss": 1.0221, + "step": 10240 + }, + { + "epoch": 0.1, + "learning_rate": 0.000256, + "loss": 1.0043, + "step": 10250 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025624999999999997, + "loss": 1.0257, + "step": 10260 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025649999999999995, + "loss": 1.0277, + "step": 10270 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025675, + "loss": 1.0356, + "step": 10280 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025699999999999996, + "loss": 1.0249, + "step": 10290 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025725, + "loss": 1.0259, + "step": 10300 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025749999999999997, + "loss": 0.9973, + "step": 10310 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025774999999999995, + "loss": 1.0064, + "step": 10320 + }, + { + "epoch": 0.1, + "learning_rate": 0.000258, + "loss": 1.0049, + "step": 10330 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025824999999999996, + "loss": 1.0566, + "step": 10340 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002585, + "loss": 1.0659, + "step": 10350 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025875, + "loss": 1.0536, + "step": 10360 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025899999999999995, + "loss": 1.0511, + "step": 10370 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025925, + "loss": 1.0522, + "step": 10380 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025949999999999997, + "loss": 1.0431, + "step": 10390 + }, + { + "epoch": 0.1, + "learning_rate": 0.00025975, + "loss": 1.0517, + "step": 10400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026, + "loss": 1.0646, + "step": 10410 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026025, + "loss": 1.0405, + "step": 10420 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002605, + "loss": 1.0417, + "step": 10430 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026074999999999997, + "loss": 1.0508, + "step": 10440 + }, + { + "epoch": 0.1, + "learning_rate": 0.000261, + "loss": 1.0434, + "step": 10450 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026125, + "loss": 1.0584, + "step": 10460 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002615, + "loss": 1.069, + "step": 10470 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026175, + "loss": 1.0468, + "step": 10480 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026199999999999997, + "loss": 1.0446, + "step": 10490 + }, + { + "epoch": 0.1, + "learning_rate": 0.00026224999999999995, + "loss": 1.0502, + "step": 10500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002625, + "loss": 1.0355, + "step": 10510 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026274999999999996, + "loss": 1.0702, + "step": 10520 + }, + { + "epoch": 0.11, + "learning_rate": 0.000263, + "loss": 1.0659, + "step": 10530 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026325, + "loss": 1.0434, + "step": 10540 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026349999999999995, + "loss": 1.0538, + "step": 10550 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026375, + "loss": 1.0319, + "step": 10560 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026399999999999997, + "loss": 1.0367, + "step": 10570 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026425, + "loss": 1.0461, + "step": 10580 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002645, + "loss": 1.0409, + "step": 10590 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026474999999999996, + "loss": 1.0372, + "step": 10600 + }, + { + "epoch": 0.11, + "learning_rate": 0.000265, + "loss": 1.048, + "step": 10610 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026524999999999997, + "loss": 1.0323, + "step": 10620 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002655, + "loss": 1.0342, + "step": 10630 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026575, + "loss": 1.0125, + "step": 10640 + }, + { + "epoch": 0.11, + "learning_rate": 0.000266, + "loss": 1.0315, + "step": 10650 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026624999999999994, + "loss": 1.0279, + "step": 10660 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026649999999999997, + "loss": 1.0302, + "step": 10670 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026674999999999995, + "loss": 1.0211, + "step": 10680 + }, + { + "epoch": 0.11, + "learning_rate": 0.000267, + "loss": 1.0223, + "step": 10690 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026724999999999996, + "loss": 1.0169, + "step": 10700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002675, + "loss": 1.036, + "step": 10710 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026772499999999996, + "loss": 1.0318, + "step": 10720 + }, + { + "epoch": 0.11, + "learning_rate": 0.000267975, + "loss": 1.0206, + "step": 10730 + }, + { + "epoch": 0.11, + "learning_rate": 0.000268225, + "loss": 0.999, + "step": 10740 + }, + { + "epoch": 0.11, + "learning_rate": 0.000268475, + "loss": 0.9801, + "step": 10750 + }, + { + "epoch": 0.11, + "learning_rate": 0.000268725, + "loss": 0.9834, + "step": 10760 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026897499999999996, + "loss": 1.0261, + "step": 10770 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026922499999999994, + "loss": 1.0063, + "step": 10780 + }, + { + "epoch": 0.11, + "learning_rate": 0.000269475, + "loss": 1.0153, + "step": 10790 + }, + { + "epoch": 0.11, + "learning_rate": 0.00026972499999999996, + "loss": 0.9967, + "step": 10800 + }, + { + "epoch": 0.11, + "learning_rate": 0.000269975, + "loss": 0.9976, + "step": 10810 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027022499999999997, + "loss": 1.0015, + "step": 10820 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027047499999999995, + "loss": 1.0215, + "step": 10830 + }, + { + "epoch": 0.11, + "learning_rate": 0.000270725, + "loss": 1.0095, + "step": 10840 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027097499999999996, + "loss": 1.0341, + "step": 10850 + }, + { + "epoch": 0.11, + "learning_rate": 0.000271225, + "loss": 1.0016, + "step": 10860 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027147499999999997, + "loss": 1.0226, + "step": 10870 + }, + { + "epoch": 0.11, + "learning_rate": 0.000271725, + "loss": 0.9989, + "step": 10880 + }, + { + "epoch": 0.11, + "learning_rate": 0.000271975, + "loss": 1.006, + "step": 10890 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027222499999999996, + "loss": 0.9861, + "step": 10900 + }, + { + "epoch": 0.11, + "learning_rate": 0.000272475, + "loss": 1.0227, + "step": 10910 + }, + { + "epoch": 0.11, + "learning_rate": 0.000272725, + "loss": 0.9842, + "step": 10920 + }, + { + "epoch": 0.11, + "learning_rate": 0.000272975, + "loss": 0.9882, + "step": 10930 + }, + { + "epoch": 0.11, + "learning_rate": 0.000273225, + "loss": 0.9849, + "step": 10940 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027347499999999997, + "loss": 1.003, + "step": 10950 + }, + { + "epoch": 0.11, + "learning_rate": 0.000273725, + "loss": 1.0014, + "step": 10960 + }, + { + "epoch": 0.11, + "learning_rate": 0.000273975, + "loss": 1.0029, + "step": 10970 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027422499999999996, + "loss": 1.0124, + "step": 10980 + }, + { + "epoch": 0.11, + "learning_rate": 0.000274475, + "loss": 1.0175, + "step": 10990 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027472499999999997, + "loss": 0.9947, + "step": 11000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027497499999999995, + "loss": 0.9935, + "step": 11010 + }, + { + "epoch": 0.11, + "learning_rate": 0.000275225, + "loss": 1.0061, + "step": 11020 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027547499999999996, + "loss": 1.0129, + "step": 11030 + }, + { + "epoch": 0.11, + "learning_rate": 0.000275725, + "loss": 1.0046, + "step": 11040 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027597499999999997, + "loss": 0.9996, + "step": 11050 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027622499999999995, + "loss": 0.9892, + "step": 11060 + }, + { + "epoch": 0.11, + "learning_rate": 0.000276475, + "loss": 0.9932, + "step": 11070 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027672499999999996, + "loss": 1.021, + "step": 11080 + }, + { + "epoch": 0.11, + "learning_rate": 0.000276975, + "loss": 1.0211, + "step": 11090 + }, + { + "epoch": 0.11, + "learning_rate": 0.000277225, + "loss": 1.0129, + "step": 11100 + }, + { + "epoch": 0.11, + "learning_rate": 0.000277475, + "loss": 0.9703, + "step": 11110 + }, + { + "epoch": 0.11, + "learning_rate": 0.000277725, + "loss": 0.9861, + "step": 11120 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027797499999999997, + "loss": 0.9778, + "step": 11130 + }, + { + "epoch": 0.11, + "learning_rate": 0.000278225, + "loss": 0.9914, + "step": 11140 + }, + { + "epoch": 0.11, + "learning_rate": 0.000278475, + "loss": 0.9898, + "step": 11150 + }, + { + "epoch": 0.11, + "learning_rate": 0.000278725, + "loss": 1.0094, + "step": 11160 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027897499999999994, + "loss": 0.9753, + "step": 11170 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027922499999999997, + "loss": 0.9997, + "step": 11180 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027947499999999995, + "loss": 0.9668, + "step": 11190 + }, + { + "epoch": 0.11, + "learning_rate": 0.000279725, + "loss": 1.0121, + "step": 11200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00027997499999999996, + "loss": 1.0003, + "step": 11210 + }, + { + "epoch": 0.11, + "learning_rate": 0.000280225, + "loss": 1.0057, + "step": 11220 + }, + { + "epoch": 0.11, + "learning_rate": 0.000280475, + "loss": 1.0012, + "step": 11230 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028072499999999995, + "loss": 0.9905, + "step": 11240 + }, + { + "epoch": 0.11, + "learning_rate": 0.000280975, + "loss": 0.9816, + "step": 11250 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028122499999999996, + "loss": 1.0025, + "step": 11260 + }, + { + "epoch": 0.11, + "learning_rate": 0.000281475, + "loss": 0.9853, + "step": 11270 + }, + { + "epoch": 0.11, + "learning_rate": 0.000281725, + "loss": 0.9964, + "step": 11280 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028197499999999996, + "loss": 0.9941, + "step": 11290 + }, + { + "epoch": 0.11, + "learning_rate": 0.000282225, + "loss": 0.9888, + "step": 11300 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028247499999999997, + "loss": 0.9934, + "step": 11310 + }, + { + "epoch": 0.11, + "learning_rate": 0.000282725, + "loss": 1.0022, + "step": 11320 + }, + { + "epoch": 0.11, + "learning_rate": 0.000282975, + "loss": 0.9831, + "step": 11330 + }, + { + "epoch": 0.11, + "learning_rate": 0.000283225, + "loss": 1.0045, + "step": 11340 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028347499999999994, + "loss": 0.9923, + "step": 11350 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028372499999999997, + "loss": 1.0049, + "step": 11360 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028397499999999995, + "loss": 0.9778, + "step": 11370 + }, + { + "epoch": 0.11, + "learning_rate": 0.000284225, + "loss": 0.9896, + "step": 11380 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028447499999999996, + "loss": 0.9914, + "step": 11390 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028472499999999994, + "loss": 1.005, + "step": 11400 + }, + { + "epoch": 0.11, + "learning_rate": 0.000284975, + "loss": 1.0014, + "step": 11410 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028522499999999995, + "loss": 1.0103, + "step": 11420 + }, + { + "epoch": 0.11, + "learning_rate": 0.000285475, + "loss": 0.9762, + "step": 11430 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028572499999999997, + "loss": 1.002, + "step": 11440 + }, + { + "epoch": 0.11, + "learning_rate": 0.000285975, + "loss": 0.9845, + "step": 11450 + }, + { + "epoch": 0.11, + "learning_rate": 0.000286225, + "loss": 0.9944, + "step": 11460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028647499999999996, + "loss": 0.9813, + "step": 11470 + }, + { + "epoch": 0.11, + "learning_rate": 0.000286725, + "loss": 0.9881, + "step": 11480 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028697499999999997, + "loss": 0.9478, + "step": 11490 + }, + { + "epoch": 0.12, + "learning_rate": 0.000287225, + "loss": 0.976, + "step": 11500 + }, + { + "epoch": 0.12, + "learning_rate": 0.000287475, + "loss": 0.9837, + "step": 11510 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028772499999999996, + "loss": 1.0044, + "step": 11520 + }, + { + "epoch": 0.12, + "learning_rate": 0.000287975, + "loss": 0.9704, + "step": 11530 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028822499999999997, + "loss": 0.9819, + "step": 11540 + }, + { + "epoch": 0.12, + "learning_rate": 0.000288475, + "loss": 0.9807, + "step": 11550 + }, + { + "epoch": 0.12, + "learning_rate": 0.000288725, + "loss": 0.9934, + "step": 11560 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028897499999999996, + "loss": 0.9772, + "step": 11570 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028922499999999994, + "loss": 0.9869, + "step": 11580 + }, + { + "epoch": 0.12, + "learning_rate": 0.000289475, + "loss": 1.0183, + "step": 11590 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028972499999999995, + "loss": 1.0031, + "step": 11600 + }, + { + "epoch": 0.12, + "learning_rate": 0.000289975, + "loss": 1.005, + "step": 11610 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029022499999999997, + "loss": 0.9948, + "step": 11620 + }, + { + "epoch": 0.12, + "learning_rate": 0.000290475, + "loss": 1.0125, + "step": 11630 + }, + { + "epoch": 0.12, + "learning_rate": 0.000290725, + "loss": 1.0066, + "step": 11640 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029097499999999996, + "loss": 1.0183, + "step": 11650 + }, + { + "epoch": 0.12, + "learning_rate": 0.000291225, + "loss": 1.0012, + "step": 11660 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029147499999999997, + "loss": 0.9977, + "step": 11670 + }, + { + "epoch": 0.12, + "learning_rate": 0.000291725, + "loss": 0.991, + "step": 11680 + }, + { + "epoch": 0.12, + "learning_rate": 0.000291975, + "loss": 0.996, + "step": 11690 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029222499999999996, + "loss": 1.0012, + "step": 11700 + }, + { + "epoch": 0.12, + "learning_rate": 0.000292475, + "loss": 1.0169, + "step": 11710 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029269999999999996, + "loss": 1.0089, + "step": 11720 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029295, + "loss": 0.9878, + "step": 11730 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029319999999999997, + "loss": 0.9738, + "step": 11740 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029344999999999995, + "loss": 1.0032, + "step": 11750 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002937, + "loss": 0.994, + "step": 11760 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029394999999999996, + "loss": 1.005, + "step": 11770 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002942, + "loss": 0.9683, + "step": 11780 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029445, + "loss": 0.983, + "step": 11790 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029469999999999995, + "loss": 0.9743, + "step": 11800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029495, + "loss": 0.9866, + "step": 11810 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029519999999999997, + "loss": 0.9976, + "step": 11820 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029545, + "loss": 1.0009, + "step": 11830 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002957, + "loss": 1.0055, + "step": 11840 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029595, + "loss": 0.9906, + "step": 11850 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029619999999999994, + "loss": 0.9845, + "step": 11860 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029644999999999997, + "loss": 0.9647, + "step": 11870 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029669999999999995, + "loss": 1.0115, + "step": 11880 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029695, + "loss": 1.0021, + "step": 11890 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029719999999999996, + "loss": 1.0008, + "step": 11900 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029745, + "loss": 0.9766, + "step": 11910 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029769999999999997, + "loss": 0.966, + "step": 11920 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029794999999999995, + "loss": 0.991, + "step": 11930 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002982, + "loss": 0.9696, + "step": 11940 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029844999999999996, + "loss": 0.9896, + "step": 11950 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002987, + "loss": 0.9878, + "step": 11960 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029895, + "loss": 0.9877, + "step": 11970 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029919999999999995, + "loss": 0.9899, + "step": 11980 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029945, + "loss": 0.9921, + "step": 11990 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029969999999999997, + "loss": 0.9874, + "step": 12000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00029995, + "loss": 1.0079, + "step": 12010 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003002, + "loss": 0.9987, + "step": 12020 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030045, + "loss": 0.9862, + "step": 12030 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030069999999999994, + "loss": 0.9898, + "step": 12040 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030095, + "loss": 0.9914, + "step": 12050 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030119999999999995, + "loss": 0.9903, + "step": 12060 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030144999999999993, + "loss": 0.9978, + "step": 12070 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030169999999999996, + "loss": 0.9808, + "step": 12080 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030194999999999994, + "loss": 1.0049, + "step": 12090 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003022, + "loss": 0.9987, + "step": 12100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030244999999999995, + "loss": 0.9894, + "step": 12110 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030269999999999993, + "loss": 0.9732, + "step": 12120 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030294999999999996, + "loss": 0.9937, + "step": 12130 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030319999999999994, + "loss": 0.994, + "step": 12140 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030345, + "loss": 1.0007, + "step": 12150 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030369999999999996, + "loss": 0.989, + "step": 12160 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030395, + "loss": 0.9978, + "step": 12170 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030419999999999997, + "loss": 0.993, + "step": 12180 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030444999999999995, + "loss": 0.9995, + "step": 12190 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003047, + "loss": 0.9951, + "step": 12200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030494999999999996, + "loss": 0.9982, + "step": 12210 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003052, + "loss": 1.0111, + "step": 12220 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030544999999999997, + "loss": 0.9741, + "step": 12230 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030569999999999995, + "loss": 0.9882, + "step": 12240 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030595, + "loss": 0.9801, + "step": 12250 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030619999999999996, + "loss": 0.9705, + "step": 12260 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030645, + "loss": 0.9911, + "step": 12270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003067, + "loss": 0.9778, + "step": 12280 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030694999999999995, + "loss": 0.9778, + "step": 12290 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003072, + "loss": 0.9886, + "step": 12300 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030744999999999997, + "loss": 0.9936, + "step": 12310 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003077, + "loss": 0.9837, + "step": 12320 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030795, + "loss": 0.9883, + "step": 12330 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003082, + "loss": 0.9718, + "step": 12340 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030845, + "loss": 0.9745, + "step": 12350 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030869999999999997, + "loss": 0.9696, + "step": 12360 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030895, + "loss": 0.9832, + "step": 12370 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003092, + "loss": 0.9874, + "step": 12380 + }, + { + "epoch": 0.12, + "learning_rate": 0.00030945, + "loss": 0.9961, + "step": 12390 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003097, + "loss": 0.9864, + "step": 12400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003099499999999999, + "loss": 0.967, + "step": 12410 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003102, + "loss": 0.9945, + "step": 12420 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031044999999999993, + "loss": 0.9778, + "step": 12430 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003107, + "loss": 0.9881, + "step": 12440 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031094999999999994, + "loss": 0.9843, + "step": 12450 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031120000000000003, + "loss": 0.9742, + "step": 12460 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031144999999999995, + "loss": 0.9726, + "step": 12470 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031169999999999993, + "loss": 0.9565, + "step": 12480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031194999999999997, + "loss": 0.9647, + "step": 12490 + }, + { + "epoch": 0.12, + "learning_rate": 0.00031219999999999995, + "loss": 0.9817, + "step": 12500 + }, + { + "epoch": 0.12, + "eval_accuracy": 0.7950684202995898, + "eval_loss": 1.0478515625, + "eval_runtime": 97.1269, + "eval_samples_per_second": 823.665, + "eval_steps_per_second": 1.616, + "step": 12500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031245, + "loss": 0.9518, + "step": 12510 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031269999999999996, + "loss": 0.9495, + "step": 12520 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031294999999999994, + "loss": 0.9507, + "step": 12530 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031319999999999997, + "loss": 0.9361, + "step": 12540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031344999999999995, + "loss": 0.944, + "step": 12550 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003137, + "loss": 0.9476, + "step": 12560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031394999999999996, + "loss": 0.9425, + "step": 12570 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031419999999999994, + "loss": 0.9681, + "step": 12580 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031444999999999997, + "loss": 0.9618, + "step": 12590 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031469999999999995, + "loss": 0.9591, + "step": 12600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031495, + "loss": 0.9782, + "step": 12610 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031519999999999996, + "loss": 0.9584, + "step": 12620 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031545, + "loss": 0.9911, + "step": 12630 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003157, + "loss": 0.9893, + "step": 12640 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031594999999999995, + "loss": 0.9754, + "step": 12650 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003162, + "loss": 0.9664, + "step": 12660 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031644999999999997, + "loss": 0.9677, + "step": 12670 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003167, + "loss": 0.9835, + "step": 12680 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031695, + "loss": 0.9739, + "step": 12690 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031719999999999996, + "loss": 0.9776, + "step": 12700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031745, + "loss": 0.9723, + "step": 12710 + }, + { + "epoch": 0.13, + "learning_rate": 0.000317675, + "loss": 0.9647, + "step": 12720 + }, + { + "epoch": 0.13, + "learning_rate": 0.000317925, + "loss": 0.9681, + "step": 12730 + }, + { + "epoch": 0.13, + "learning_rate": 0.000318175, + "loss": 0.9747, + "step": 12740 + }, + { + "epoch": 0.13, + "learning_rate": 0.000318425, + "loss": 0.9683, + "step": 12750 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003186749999999999, + "loss": 0.9854, + "step": 12760 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031892499999999996, + "loss": 0.9822, + "step": 12770 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031917499999999994, + "loss": 0.9719, + "step": 12780 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031942499999999997, + "loss": 0.9675, + "step": 12790 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031967499999999995, + "loss": 0.9678, + "step": 12800 + }, + { + "epoch": 0.13, + "learning_rate": 0.00031992499999999993, + "loss": 0.9665, + "step": 12810 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032017499999999996, + "loss": 0.9547, + "step": 12820 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032042499999999994, + "loss": 0.9615, + "step": 12830 + }, + { + "epoch": 0.13, + "learning_rate": 0.000320675, + "loss": 0.9436, + "step": 12840 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032092499999999995, + "loss": 0.985, + "step": 12850 + }, + { + "epoch": 0.13, + "learning_rate": 0.000321175, + "loss": 0.9761, + "step": 12860 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032142499999999997, + "loss": 0.9892, + "step": 12870 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032167499999999994, + "loss": 0.9831, + "step": 12880 + }, + { + "epoch": 0.13, + "learning_rate": 0.000321925, + "loss": 0.978, + "step": 12890 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032217499999999996, + "loss": 0.9744, + "step": 12900 + }, + { + "epoch": 0.13, + "learning_rate": 0.000322425, + "loss": 0.9768, + "step": 12910 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032267499999999997, + "loss": 0.9772, + "step": 12920 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032292499999999995, + "loss": 0.9737, + "step": 12930 + }, + { + "epoch": 0.13, + "learning_rate": 0.000323175, + "loss": 0.9604, + "step": 12940 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032342499999999996, + "loss": 0.9649, + "step": 12950 + }, + { + "epoch": 0.13, + "learning_rate": 0.000323675, + "loss": 0.9837, + "step": 12960 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032392499999999997, + "loss": 0.9745, + "step": 12970 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032417499999999995, + "loss": 0.9844, + "step": 12980 + }, + { + "epoch": 0.13, + "learning_rate": 0.000324425, + "loss": 0.9779, + "step": 12990 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032467499999999996, + "loss": 0.9783, + "step": 13000 + }, + { + "epoch": 0.13, + "learning_rate": 0.000324925, + "loss": 0.9821, + "step": 13010 + }, + { + "epoch": 0.13, + "learning_rate": 0.000325175, + "loss": 0.9589, + "step": 13020 + }, + { + "epoch": 0.13, + "learning_rate": 0.000325425, + "loss": 0.977, + "step": 13030 + }, + { + "epoch": 0.13, + "learning_rate": 0.000325675, + "loss": 0.9504, + "step": 13040 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032592499999999997, + "loss": 0.9621, + "step": 13050 + }, + { + "epoch": 0.13, + "learning_rate": 0.000326175, + "loss": 0.9669, + "step": 13060 + }, + { + "epoch": 0.13, + "learning_rate": 0.000326425, + "loss": 0.9712, + "step": 13070 + }, + { + "epoch": 0.13, + "learning_rate": 0.000326675, + "loss": 0.9772, + "step": 13080 + }, + { + "epoch": 0.13, + "learning_rate": 0.000326925, + "loss": 0.969, + "step": 13090 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003271749999999999, + "loss": 0.9666, + "step": 13100 + }, + { + "epoch": 0.13, + "learning_rate": 0.000327425, + "loss": 0.9537, + "step": 13110 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032767499999999993, + "loss": 0.9664, + "step": 13120 + }, + { + "epoch": 0.13, + "learning_rate": 0.000327925, + "loss": 0.9789, + "step": 13130 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032817499999999994, + "loss": 0.9689, + "step": 13140 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003284249999999999, + "loss": 0.9689, + "step": 13150 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032867499999999995, + "loss": 0.9678, + "step": 13160 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032892499999999993, + "loss": 0.968, + "step": 13170 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032917499999999996, + "loss": 0.952, + "step": 13180 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032942499999999994, + "loss": 0.9559, + "step": 13190 + }, + { + "epoch": 0.13, + "learning_rate": 0.000329675, + "loss": 0.963, + "step": 13200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00032992499999999996, + "loss": 0.9352, + "step": 13210 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033017499999999993, + "loss": 0.9508, + "step": 13220 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033042499999999997, + "loss": 0.9283, + "step": 13230 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033067499999999995, + "loss": 0.9382, + "step": 13240 + }, + { + "epoch": 0.13, + "learning_rate": 0.000330925, + "loss": 0.944, + "step": 13250 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033117499999999996, + "loss": 0.951, + "step": 13260 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033142499999999994, + "loss": 0.9331, + "step": 13270 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033167499999999997, + "loss": 0.9377, + "step": 13280 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033192499999999995, + "loss": 0.934, + "step": 13290 + }, + { + "epoch": 0.13, + "learning_rate": 0.000332175, + "loss": 0.9317, + "step": 13300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033242499999999996, + "loss": 0.929, + "step": 13310 + }, + { + "epoch": 0.13, + "learning_rate": 0.000332675, + "loss": 0.9474, + "step": 13320 + }, + { + "epoch": 0.13, + "learning_rate": 0.000332925, + "loss": 0.9454, + "step": 13330 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033317499999999995, + "loss": 0.9484, + "step": 13340 + }, + { + "epoch": 0.13, + "learning_rate": 0.000333425, + "loss": 0.9346, + "step": 13350 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033367499999999997, + "loss": 0.9379, + "step": 13360 + }, + { + "epoch": 0.13, + "learning_rate": 0.000333925, + "loss": 0.93, + "step": 13370 + }, + { + "epoch": 0.13, + "learning_rate": 0.000334175, + "loss": 0.9487, + "step": 13380 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033442499999999996, + "loss": 0.9495, + "step": 13390 + }, + { + "epoch": 0.13, + "learning_rate": 0.000334675, + "loss": 0.9382, + "step": 13400 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033492499999999997, + "loss": 0.9315, + "step": 13410 + }, + { + "epoch": 0.13, + "learning_rate": 0.000335175, + "loss": 0.9374, + "step": 13420 + }, + { + "epoch": 0.13, + "learning_rate": 0.000335425, + "loss": 0.9448, + "step": 13430 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033567499999999996, + "loss": 0.9493, + "step": 13440 + }, + { + "epoch": 0.13, + "learning_rate": 0.000335925, + "loss": 0.9409, + "step": 13450 + }, + { + "epoch": 0.13, + "learning_rate": 0.00033617499999999997, + "loss": 0.9346, + "step": 13460 + }, + { + "epoch": 0.13, + "learning_rate": 0.000336425, + "loss": 0.9601, + "step": 13470 + }, + { + "epoch": 0.13, + "learning_rate": 0.000336675, + "loss": 0.9639, + "step": 13480 + }, + { + "epoch": 0.13, + "learning_rate": 0.000336925, + "loss": 0.9514, + "step": 13490 + }, + { + "epoch": 0.14, + "learning_rate": 0.000337175, + "loss": 0.974, + "step": 13500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003374249999999999, + "loss": 0.9714, + "step": 13510 + }, + { + "epoch": 0.14, + "learning_rate": 0.000337675, + "loss": 0.9557, + "step": 13520 + }, + { + "epoch": 0.14, + "learning_rate": 0.00033792499999999993, + "loss": 0.9642, + "step": 13530 + }, + { + "epoch": 0.14, + "learning_rate": 0.000338175, + "loss": 0.957, + "step": 13540 + }, + { + "epoch": 0.14, + "learning_rate": 0.00033842499999999995, + "loss": 0.9521, + "step": 13550 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003386749999999999, + "loss": 0.9619, + "step": 13560 + }, + { + "epoch": 0.14, + "learning_rate": 0.00033892499999999996, + "loss": 0.9743, + "step": 13570 + }, + { + "epoch": 0.14, + "learning_rate": 0.00033917499999999994, + "loss": 0.9548, + "step": 13580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00033942499999999997, + "loss": 0.9571, + "step": 13590 + }, + { + "epoch": 0.14, + "learning_rate": 0.00033967499999999995, + "loss": 0.9536, + "step": 13600 + }, + { + "epoch": 0.14, + "learning_rate": 0.000339925, + "loss": 0.9645, + "step": 13610 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034017499999999996, + "loss": 0.9431, + "step": 13620 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034042499999999994, + "loss": 0.9596, + "step": 13630 + }, + { + "epoch": 0.14, + "learning_rate": 0.000340675, + "loss": 0.9659, + "step": 13640 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034092499999999995, + "loss": 0.9651, + "step": 13650 + }, + { + "epoch": 0.14, + "learning_rate": 0.000341175, + "loss": 0.9674, + "step": 13660 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034142499999999996, + "loss": 0.9548, + "step": 13670 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034167499999999994, + "loss": 0.9649, + "step": 13680 + }, + { + "epoch": 0.14, + "learning_rate": 0.000341925, + "loss": 0.9571, + "step": 13690 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034217499999999996, + "loss": 0.9592, + "step": 13700 + }, + { + "epoch": 0.14, + "learning_rate": 0.000342425, + "loss": 0.9577, + "step": 13710 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034265, + "loss": 0.9515, + "step": 13720 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003429, + "loss": 0.9422, + "step": 13730 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034314999999999997, + "loss": 0.9448, + "step": 13740 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003434, + "loss": 0.9529, + "step": 13750 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034365, + "loss": 0.9589, + "step": 13760 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003439, + "loss": 0.9651, + "step": 13770 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034415, + "loss": 0.9682, + "step": 13780 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034439999999999997, + "loss": 0.942, + "step": 13790 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034465, + "loss": 0.9512, + "step": 13800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003449, + "loss": 0.9686, + "step": 13810 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034515, + "loss": 0.9516, + "step": 13820 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034539999999999994, + "loss": 0.9463, + "step": 13830 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003456499999999999, + "loss": 0.943, + "step": 13840 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034589999999999995, + "loss": 0.9457, + "step": 13850 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034614999999999993, + "loss": 0.9424, + "step": 13860 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034639999999999996, + "loss": 0.9498, + "step": 13870 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034664999999999994, + "loss": 0.9644, + "step": 13880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003469, + "loss": 0.9507, + "step": 13890 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034714999999999995, + "loss": 0.9474, + "step": 13900 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034739999999999993, + "loss": 0.9442, + "step": 13910 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034764999999999997, + "loss": 0.9538, + "step": 13920 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034789999999999995, + "loss": 0.9647, + "step": 13930 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034815, + "loss": 0.9311, + "step": 13940 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034839999999999996, + "loss": 0.9468, + "step": 13950 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034864999999999994, + "loss": 0.9446, + "step": 13960 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034889999999999997, + "loss": 0.9407, + "step": 13970 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034914999999999995, + "loss": 0.928, + "step": 13980 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003494, + "loss": 0.9465, + "step": 13990 + }, + { + "epoch": 0.14, + "learning_rate": 0.00034964999999999996, + "loss": 0.9496, + "step": 14000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003499, + "loss": 0.9511, + "step": 14010 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035015, + "loss": 0.9594, + "step": 14020 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035039999999999995, + "loss": 0.9473, + "step": 14030 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035065, + "loss": 0.9374, + "step": 14040 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035089999999999996, + "loss": 0.9394, + "step": 14050 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035115, + "loss": 0.9547, + "step": 14060 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003514, + "loss": 0.9557, + "step": 14070 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035164999999999996, + "loss": 0.9297, + "step": 14080 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003519, + "loss": 0.9475, + "step": 14090 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035214999999999997, + "loss": 0.9497, + "step": 14100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003524, + "loss": 0.9467, + "step": 14110 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035265, + "loss": 0.9632, + "step": 14120 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035289999999999996, + "loss": 0.9579, + "step": 14130 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035315, + "loss": 0.9547, + "step": 14140 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035339999999999997, + "loss": 0.941, + "step": 14150 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035365, + "loss": 0.9354, + "step": 14160 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003539, + "loss": 0.9401, + "step": 14170 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035415, + "loss": 0.9526, + "step": 14180 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003544, + "loss": 0.9453, + "step": 14190 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003546499999999999, + "loss": 0.9356, + "step": 14200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003549, + "loss": 0.9381, + "step": 14210 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035514999999999993, + "loss": 0.944, + "step": 14220 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003554, + "loss": 0.9448, + "step": 14230 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035564999999999994, + "loss": 0.9566, + "step": 14240 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003558999999999999, + "loss": 0.9356, + "step": 14250 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035614999999999996, + "loss": 0.9481, + "step": 14260 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035639999999999994, + "loss": 0.9392, + "step": 14270 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035664999999999997, + "loss": 0.9335, + "step": 14280 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035689999999999995, + "loss": 0.9419, + "step": 14290 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003571499999999999, + "loss": 0.9458, + "step": 14300 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035739999999999996, + "loss": 0.949, + "step": 14310 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035764999999999994, + "loss": 0.9428, + "step": 14320 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035789999999999997, + "loss": 0.9429, + "step": 14330 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035814999999999995, + "loss": 0.9408, + "step": 14340 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003584, + "loss": 0.94, + "step": 14350 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035864999999999996, + "loss": 0.9442, + "step": 14360 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035889999999999994, + "loss": 0.9475, + "step": 14370 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035915, + "loss": 0.939, + "step": 14380 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035939999999999995, + "loss": 0.9331, + "step": 14390 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035965, + "loss": 0.9342, + "step": 14400 + }, + { + "epoch": 0.14, + "learning_rate": 0.00035989999999999997, + "loss": 0.9157, + "step": 14410 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036014999999999995, + "loss": 0.9288, + "step": 14420 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003604, + "loss": 0.9442, + "step": 14430 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036064999999999996, + "loss": 0.9333, + "step": 14440 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003609, + "loss": 0.9477, + "step": 14450 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036114999999999997, + "loss": 0.9564, + "step": 14460 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003614, + "loss": 0.9514, + "step": 14470 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036165, + "loss": 0.9478, + "step": 14480 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036189999999999996, + "loss": 0.9499, + "step": 14490 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036215, + "loss": 0.9363, + "step": 14500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036239999999999997, + "loss": 0.9307, + "step": 14510 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036265, + "loss": 0.933, + "step": 14520 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003629, + "loss": 0.9388, + "step": 14530 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036314999999999996, + "loss": 0.9317, + "step": 14540 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003634, + "loss": 0.9299, + "step": 14550 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036365, + "loss": 0.9437, + "step": 14560 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003639, + "loss": 0.9454, + "step": 14570 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036415, + "loss": 0.9416, + "step": 14580 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003643999999999999, + "loss": 0.9183, + "step": 14590 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036465, + "loss": 0.9327, + "step": 14600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003648999999999999, + "loss": 0.922, + "step": 14610 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036515, + "loss": 0.9334, + "step": 14620 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036539999999999994, + "loss": 0.9428, + "step": 14630 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036565, + "loss": 0.9437, + "step": 14640 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036589999999999995, + "loss": 0.9216, + "step": 14650 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036614999999999993, + "loss": 0.9294, + "step": 14660 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036639999999999996, + "loss": 0.9188, + "step": 14670 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036664999999999994, + "loss": 0.9329, + "step": 14680 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003669, + "loss": 0.9372, + "step": 14690 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036714999999999995, + "loss": 0.9343, + "step": 14700 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036739999999999993, + "loss": 0.9351, + "step": 14710 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036764999999999996, + "loss": 0.9063, + "step": 14720 + }, + { + "epoch": 0.15, + "learning_rate": 0.000367875, + "loss": 0.9074, + "step": 14730 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036812499999999996, + "loss": 0.9242, + "step": 14740 + }, + { + "epoch": 0.15, + "learning_rate": 0.000368375, + "loss": 0.9145, + "step": 14750 + }, + { + "epoch": 0.15, + "learning_rate": 0.000368625, + "loss": 0.9093, + "step": 14760 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036887499999999995, + "loss": 0.9172, + "step": 14770 + }, + { + "epoch": 0.15, + "learning_rate": 0.000369125, + "loss": 0.9219, + "step": 14780 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036937499999999997, + "loss": 0.9111, + "step": 14790 + }, + { + "epoch": 0.15, + "learning_rate": 0.000369625, + "loss": 0.9313, + "step": 14800 + }, + { + "epoch": 0.15, + "learning_rate": 0.000369875, + "loss": 0.9135, + "step": 14810 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037012499999999996, + "loss": 0.931, + "step": 14820 + }, + { + "epoch": 0.15, + "learning_rate": 0.000370375, + "loss": 0.9043, + "step": 14830 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037062499999999997, + "loss": 0.9024, + "step": 14840 + }, + { + "epoch": 0.15, + "learning_rate": 0.000370875, + "loss": 0.9054, + "step": 14850 + }, + { + "epoch": 0.15, + "learning_rate": 0.000371125, + "loss": 0.9101, + "step": 14860 + }, + { + "epoch": 0.15, + "learning_rate": 0.000371375, + "loss": 0.9087, + "step": 14870 + }, + { + "epoch": 0.15, + "learning_rate": 0.000371625, + "loss": 0.9163, + "step": 14880 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003718749999999999, + "loss": 0.9187, + "step": 14890 + }, + { + "epoch": 0.15, + "learning_rate": 0.000372125, + "loss": 0.9218, + "step": 14900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037237499999999993, + "loss": 0.9167, + "step": 14910 + }, + { + "epoch": 0.15, + "learning_rate": 0.000372625, + "loss": 0.9154, + "step": 14920 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037287499999999994, + "loss": 0.9129, + "step": 14930 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003731249999999999, + "loss": 0.9134, + "step": 14940 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037337499999999995, + "loss": 0.914, + "step": 14950 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037362499999999993, + "loss": 0.9167, + "step": 14960 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037387499999999997, + "loss": 0.9252, + "step": 14970 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037412499999999995, + "loss": 0.9329, + "step": 14980 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003743749999999999, + "loss": 0.949, + "step": 14990 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037462499999999996, + "loss": 0.9463, + "step": 15000 + }, + { + "epoch": 0.15, + "eval_accuracy": 0.811620901381853, + "eval_loss": 0.94189453125, + "eval_runtime": 97.6826, + "eval_samples_per_second": 818.979, + "eval_steps_per_second": 1.607, + "step": 15000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037487499999999994, + "loss": 0.9369, + "step": 15010 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037512499999999997, + "loss": 0.9289, + "step": 15020 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037537499999999995, + "loss": 0.9494, + "step": 15030 + }, + { + "epoch": 0.15, + "learning_rate": 0.000375625, + "loss": 0.9298, + "step": 15040 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037587499999999996, + "loss": 0.9263, + "step": 15050 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037612499999999994, + "loss": 0.9053, + "step": 15060 + }, + { + "epoch": 0.15, + "learning_rate": 0.000376375, + "loss": 0.8997, + "step": 15070 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037662499999999995, + "loss": 0.8866, + "step": 15080 + }, + { + "epoch": 0.15, + "learning_rate": 0.000376875, + "loss": 0.9064, + "step": 15090 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037712499999999996, + "loss": 0.9229, + "step": 15100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037737499999999994, + "loss": 0.928, + "step": 15110 + }, + { + "epoch": 0.15, + "learning_rate": 0.000377625, + "loss": 0.9462, + "step": 15120 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037787499999999996, + "loss": 0.9331, + "step": 15130 + }, + { + "epoch": 0.15, + "learning_rate": 0.000378125, + "loss": 0.9253, + "step": 15140 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037837499999999997, + "loss": 0.9153, + "step": 15150 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037862499999999995, + "loss": 0.93, + "step": 15160 + }, + { + "epoch": 0.15, + "learning_rate": 0.000378875, + "loss": 0.9232, + "step": 15170 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037912499999999996, + "loss": 0.9408, + "step": 15180 + }, + { + "epoch": 0.15, + "learning_rate": 0.000379375, + "loss": 0.9373, + "step": 15190 + }, + { + "epoch": 0.15, + "learning_rate": 0.00037962499999999997, + "loss": 0.9201, + "step": 15200 + }, + { + "epoch": 0.15, + "learning_rate": 0.000379875, + "loss": 0.9188, + "step": 15210 + }, + { + "epoch": 0.15, + "learning_rate": 0.000380125, + "loss": 0.9333, + "step": 15220 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038037499999999996, + "loss": 0.9171, + "step": 15230 + }, + { + "epoch": 0.15, + "learning_rate": 0.000380625, + "loss": 0.924, + "step": 15240 + }, + { + "epoch": 0.15, + "learning_rate": 0.000380875, + "loss": 0.938, + "step": 15250 + }, + { + "epoch": 0.15, + "learning_rate": 0.000381125, + "loss": 0.9111, + "step": 15260 + }, + { + "epoch": 0.15, + "learning_rate": 0.000381375, + "loss": 0.9052, + "step": 15270 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003816249999999999, + "loss": 0.9045, + "step": 15280 + }, + { + "epoch": 0.15, + "learning_rate": 0.000381875, + "loss": 0.9103, + "step": 15290 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003821249999999999, + "loss": 0.8965, + "step": 15300 + }, + { + "epoch": 0.15, + "learning_rate": 0.000382375, + "loss": 0.9055, + "step": 15310 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038262499999999994, + "loss": 0.885, + "step": 15320 + }, + { + "epoch": 0.15, + "learning_rate": 0.000382875, + "loss": 0.8948, + "step": 15330 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038312499999999995, + "loss": 0.8965, + "step": 15340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003833749999999999, + "loss": 0.909, + "step": 15350 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038362499999999996, + "loss": 0.9016, + "step": 15360 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038387499999999994, + "loss": 0.8971, + "step": 15370 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038412499999999997, + "loss": 0.8991, + "step": 15380 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038437499999999995, + "loss": 0.9154, + "step": 15390 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038462499999999993, + "loss": 0.8933, + "step": 15400 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038487499999999996, + "loss": 0.9045, + "step": 15410 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038512499999999994, + "loss": 0.9009, + "step": 15420 + }, + { + "epoch": 0.15, + "learning_rate": 0.000385375, + "loss": 0.9093, + "step": 15430 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038562499999999995, + "loss": 0.8835, + "step": 15440 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038587499999999993, + "loss": 0.9153, + "step": 15450 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038612499999999997, + "loss": 0.888, + "step": 15460 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038637499999999995, + "loss": 0.8992, + "step": 15470 + }, + { + "epoch": 0.15, + "learning_rate": 0.000386625, + "loss": 0.9111, + "step": 15480 + }, + { + "epoch": 0.15, + "learning_rate": 0.00038687499999999996, + "loss": 0.9072, + "step": 15490 + }, + { + "epoch": 0.15, + "learning_rate": 0.000387125, + "loss": 0.8989, + "step": 15500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038737499999999997, + "loss": 0.9184, + "step": 15510 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038762499999999995, + "loss": 0.9062, + "step": 15520 + }, + { + "epoch": 0.16, + "learning_rate": 0.000387875, + "loss": 0.8942, + "step": 15530 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038812499999999996, + "loss": 0.8903, + "step": 15540 + }, + { + "epoch": 0.16, + "learning_rate": 0.000388375, + "loss": 0.9002, + "step": 15550 + }, + { + "epoch": 0.16, + "learning_rate": 0.000388625, + "loss": 0.8949, + "step": 15560 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038887499999999995, + "loss": 0.9059, + "step": 15570 + }, + { + "epoch": 0.16, + "learning_rate": 0.000389125, + "loss": 0.8925, + "step": 15580 + }, + { + "epoch": 0.16, + "learning_rate": 0.00038937499999999996, + "loss": 0.9097, + "step": 15590 + }, + { + "epoch": 0.16, + "learning_rate": 0.000389625, + "loss": 0.902, + "step": 15600 + }, + { + "epoch": 0.16, + "learning_rate": 0.000389875, + "loss": 0.9065, + "step": 15610 + }, + { + "epoch": 0.16, + "learning_rate": 0.000390125, + "loss": 0.9019, + "step": 15620 + }, + { + "epoch": 0.16, + "learning_rate": 0.000390375, + "loss": 0.9129, + "step": 15630 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039062499999999997, + "loss": 0.9301, + "step": 15640 + }, + { + "epoch": 0.16, + "learning_rate": 0.000390875, + "loss": 0.9264, + "step": 15650 + }, + { + "epoch": 0.16, + "learning_rate": 0.000391125, + "loss": 0.9174, + "step": 15660 + }, + { + "epoch": 0.16, + "learning_rate": 0.000391375, + "loss": 0.9326, + "step": 15670 + }, + { + "epoch": 0.16, + "learning_rate": 0.000391625, + "loss": 0.9315, + "step": 15680 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003918749999999999, + "loss": 0.9348, + "step": 15690 + }, + { + "epoch": 0.16, + "learning_rate": 0.000392125, + "loss": 0.9141, + "step": 15700 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039237499999999993, + "loss": 0.9214, + "step": 15710 + }, + { + "epoch": 0.16, + "learning_rate": 0.000392625, + "loss": 0.9206, + "step": 15720 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039285, + "loss": 0.9257, + "step": 15730 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039309999999999996, + "loss": 0.9082, + "step": 15740 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039334999999999994, + "loss": 0.9235, + "step": 15750 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039359999999999997, + "loss": 0.9195, + "step": 15760 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039384999999999995, + "loss": 0.9134, + "step": 15770 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003941, + "loss": 0.9079, + "step": 15780 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039434999999999996, + "loss": 0.9062, + "step": 15790 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039459999999999994, + "loss": 0.9015, + "step": 15800 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039485, + "loss": 0.905, + "step": 15810 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039509999999999995, + "loss": 0.917, + "step": 15820 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039535, + "loss": 0.9032, + "step": 15830 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039559999999999997, + "loss": 0.9049, + "step": 15840 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039584999999999995, + "loss": 0.9183, + "step": 15850 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003961, + "loss": 0.9155, + "step": 15860 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039634999999999996, + "loss": 0.9321, + "step": 15870 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003966, + "loss": 0.9194, + "step": 15880 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039684999999999997, + "loss": 0.9276, + "step": 15890 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003971, + "loss": 0.9179, + "step": 15900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039735, + "loss": 0.9153, + "step": 15910 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039759999999999996, + "loss": 0.9091, + "step": 15920 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039785, + "loss": 0.8992, + "step": 15930 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003981, + "loss": 0.9248, + "step": 15940 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039835, + "loss": 0.8834, + "step": 15950 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003986, + "loss": 0.8917, + "step": 15960 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003988499999999999, + "loss": 0.9172, + "step": 15970 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003991, + "loss": 0.9126, + "step": 15980 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003993499999999999, + "loss": 0.9144, + "step": 15990 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003996, + "loss": 0.927, + "step": 16000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00039984999999999993, + "loss": 0.9193, + "step": 16010 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004000999999999999, + "loss": 0.9001, + "step": 16020 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040034999999999995, + "loss": 0.9074, + "step": 16030 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004005999999999999, + "loss": 0.8888, + "step": 16040 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040084999999999996, + "loss": 0.9051, + "step": 16050 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040109999999999994, + "loss": 0.886, + "step": 16060 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040134999999999997, + "loss": 0.8988, + "step": 16070 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040159999999999995, + "loss": 0.8789, + "step": 16080 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040184999999999993, + "loss": 0.8828, + "step": 16090 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040209999999999996, + "loss": 0.8723, + "step": 16100 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040234999999999994, + "loss": 0.9006, + "step": 16110 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004026, + "loss": 0.8891, + "step": 16120 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040284999999999995, + "loss": 0.8801, + "step": 16130 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040309999999999993, + "loss": 0.8827, + "step": 16140 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040334999999999997, + "loss": 0.8866, + "step": 16150 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040359999999999994, + "loss": 0.8691, + "step": 16160 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040385, + "loss": 0.8749, + "step": 16170 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040409999999999996, + "loss": 0.8902, + "step": 16180 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040435, + "loss": 0.9015, + "step": 16190 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040459999999999997, + "loss": 0.9031, + "step": 16200 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040484999999999995, + "loss": 0.9151, + "step": 16210 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004051, + "loss": 0.9132, + "step": 16220 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040534999999999996, + "loss": 0.9079, + "step": 16230 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004056, + "loss": 0.918, + "step": 16240 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040584999999999997, + "loss": 0.9121, + "step": 16250 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040609999999999995, + "loss": 0.9106, + "step": 16260 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040635, + "loss": 0.9031, + "step": 16270 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040659999999999996, + "loss": 0.8931, + "step": 16280 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040685, + "loss": 0.8999, + "step": 16290 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004071, + "loss": 0.9123, + "step": 16300 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040734999999999995, + "loss": 0.9201, + "step": 16310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004076, + "loss": 0.9105, + "step": 16320 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040784999999999997, + "loss": 0.9038, + "step": 16330 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004081, + "loss": 0.8916, + "step": 16340 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040835, + "loss": 0.9051, + "step": 16350 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004086, + "loss": 0.9035, + "step": 16360 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040885, + "loss": 0.9139, + "step": 16370 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004090999999999999, + "loss": 0.9228, + "step": 16380 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040935, + "loss": 0.9001, + "step": 16390 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040959999999999993, + "loss": 0.8981, + "step": 16400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00040985, + "loss": 0.8796, + "step": 16410 + }, + { + "epoch": 0.16, + "learning_rate": 0.00041009999999999994, + "loss": 0.8982, + "step": 16420 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004103499999999999, + "loss": 0.9092, + "step": 16430 + }, + { + "epoch": 0.16, + "learning_rate": 0.00041059999999999995, + "loss": 0.9078, + "step": 16440 + }, + { + "epoch": 0.16, + "learning_rate": 0.00041084999999999993, + "loss": 0.9136, + "step": 16450 + }, + { + "epoch": 0.16, + "learning_rate": 0.00041109999999999996, + "loss": 0.8812, + "step": 16460 + }, + { + "epoch": 0.16, + "learning_rate": 0.00041134999999999994, + "loss": 0.8807, + "step": 16470 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004116, + "loss": 0.893, + "step": 16480 + }, + { + "epoch": 0.16, + "learning_rate": 0.00041184999999999995, + "loss": 0.8879, + "step": 16490 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041209999999999993, + "loss": 0.8825, + "step": 16500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041234999999999997, + "loss": 0.8709, + "step": 16510 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041259999999999995, + "loss": 0.8962, + "step": 16520 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041285, + "loss": 0.8799, + "step": 16530 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041309999999999996, + "loss": 0.9039, + "step": 16540 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041334999999999994, + "loss": 0.8811, + "step": 16550 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041359999999999997, + "loss": 0.8934, + "step": 16560 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041384999999999995, + "loss": 0.8987, + "step": 16570 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004141, + "loss": 0.8806, + "step": 16580 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041434999999999996, + "loss": 0.8867, + "step": 16590 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041459999999999994, + "loss": 0.8716, + "step": 16600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041485, + "loss": 0.8798, + "step": 16610 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041509999999999995, + "loss": 0.8875, + "step": 16620 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041535, + "loss": 0.8752, + "step": 16630 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041559999999999996, + "loss": 0.8721, + "step": 16640 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041585, + "loss": 0.8783, + "step": 16650 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004161, + "loss": 0.8941, + "step": 16660 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041634999999999996, + "loss": 0.9065, + "step": 16670 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004166, + "loss": 0.8996, + "step": 16680 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041684999999999997, + "loss": 0.9105, + "step": 16690 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004171, + "loss": 0.9004, + "step": 16700 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041735, + "loss": 0.8908, + "step": 16710 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041759999999999996, + "loss": 0.8875, + "step": 16720 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004178249999999999, + "loss": 0.8938, + "step": 16730 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041807499999999996, + "loss": 0.8937, + "step": 16740 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041832499999999994, + "loss": 0.9222, + "step": 16750 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041857499999999997, + "loss": 0.905, + "step": 16760 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041882499999999995, + "loss": 0.9112, + "step": 16770 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041907499999999993, + "loss": 0.875, + "step": 16780 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041932499999999996, + "loss": 0.9051, + "step": 16790 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041957499999999994, + "loss": 0.8955, + "step": 16800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00041982499999999997, + "loss": 0.8845, + "step": 16810 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042007499999999995, + "loss": 0.8922, + "step": 16820 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042032499999999993, + "loss": 0.8942, + "step": 16830 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042057499999999996, + "loss": 0.8848, + "step": 16840 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042082499999999994, + "loss": 0.8988, + "step": 16850 + }, + { + "epoch": 0.17, + "learning_rate": 0.000421075, + "loss": 0.9023, + "step": 16860 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042132499999999995, + "loss": 0.8887, + "step": 16870 + }, + { + "epoch": 0.17, + "learning_rate": 0.000421575, + "loss": 0.8788, + "step": 16880 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042182499999999997, + "loss": 0.9045, + "step": 16890 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042207499999999995, + "loss": 0.8972, + "step": 16900 + }, + { + "epoch": 0.17, + "learning_rate": 0.000422325, + "loss": 0.8951, + "step": 16910 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042257499999999996, + "loss": 0.9044, + "step": 16920 + }, + { + "epoch": 0.17, + "learning_rate": 0.000422825, + "loss": 0.919, + "step": 16930 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042307499999999997, + "loss": 0.9027, + "step": 16940 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042332499999999995, + "loss": 0.9085, + "step": 16950 + }, + { + "epoch": 0.17, + "learning_rate": 0.000423575, + "loss": 0.9036, + "step": 16960 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042382499999999996, + "loss": 0.8956, + "step": 16970 + }, + { + "epoch": 0.17, + "learning_rate": 0.000424075, + "loss": 0.8858, + "step": 16980 + }, + { + "epoch": 0.17, + "learning_rate": 0.000424325, + "loss": 0.8867, + "step": 16990 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042457499999999995, + "loss": 0.8947, + "step": 17000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000424825, + "loss": 0.8837, + "step": 17010 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042507499999999996, + "loss": 0.8845, + "step": 17020 + }, + { + "epoch": 0.17, + "learning_rate": 0.000425325, + "loss": 0.9002, + "step": 17030 + }, + { + "epoch": 0.17, + "learning_rate": 0.000425575, + "loss": 0.903, + "step": 17040 + }, + { + "epoch": 0.17, + "learning_rate": 0.000425825, + "loss": 0.9118, + "step": 17050 + }, + { + "epoch": 0.17, + "learning_rate": 0.000426075, + "loss": 0.8913, + "step": 17060 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004263249999999999, + "loss": 0.8945, + "step": 17070 + }, + { + "epoch": 0.17, + "learning_rate": 0.000426575, + "loss": 0.886, + "step": 17080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004268249999999999, + "loss": 0.8845, + "step": 17090 + }, + { + "epoch": 0.17, + "learning_rate": 0.000427075, + "loss": 0.882, + "step": 17100 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042732499999999994, + "loss": 0.901, + "step": 17110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004275749999999999, + "loss": 0.8849, + "step": 17120 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042782499999999995, + "loss": 0.8931, + "step": 17130 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042807499999999993, + "loss": 0.886, + "step": 17140 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042832499999999996, + "loss": 0.8738, + "step": 17150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042857499999999994, + "loss": 0.8694, + "step": 17160 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004288249999999999, + "loss": 0.8888, + "step": 17170 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042907499999999995, + "loss": 0.8771, + "step": 17180 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042932499999999993, + "loss": 0.8846, + "step": 17190 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042957499999999997, + "loss": 0.8768, + "step": 17200 + }, + { + "epoch": 0.17, + "learning_rate": 0.00042982499999999994, + "loss": 0.8796, + "step": 17210 + }, + { + "epoch": 0.17, + "learning_rate": 0.000430075, + "loss": 0.8849, + "step": 17220 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043032499999999996, + "loss": 0.9074, + "step": 17230 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043057499999999994, + "loss": 0.8949, + "step": 17240 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043082499999999997, + "loss": 0.8993, + "step": 17250 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043107499999999995, + "loss": 0.9037, + "step": 17260 + }, + { + "epoch": 0.17, + "learning_rate": 0.000431325, + "loss": 0.888, + "step": 17270 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043157499999999996, + "loss": 0.8889, + "step": 17280 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043182499999999994, + "loss": 0.8938, + "step": 17290 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043207499999999997, + "loss": 0.8967, + "step": 17300 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043232499999999995, + "loss": 0.8824, + "step": 17310 + }, + { + "epoch": 0.17, + "learning_rate": 0.000432575, + "loss": 0.8823, + "step": 17320 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043282499999999996, + "loss": 0.9003, + "step": 17330 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433075, + "loss": 0.9115, + "step": 17340 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433325, + "loss": 0.8907, + "step": 17350 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043357499999999995, + "loss": 0.9, + "step": 17360 + }, + { + "epoch": 0.17, + "learning_rate": 0.000433825, + "loss": 0.8947, + "step": 17370 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043407499999999997, + "loss": 0.8867, + "step": 17380 + }, + { + "epoch": 0.17, + "learning_rate": 0.000434325, + "loss": 0.8783, + "step": 17390 + }, + { + "epoch": 0.17, + "learning_rate": 0.000434575, + "loss": 0.8778, + "step": 17400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043482499999999996, + "loss": 0.8951, + "step": 17410 + }, + { + "epoch": 0.17, + "learning_rate": 0.000435075, + "loss": 0.8697, + "step": 17420 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043532499999999997, + "loss": 0.887, + "step": 17430 + }, + { + "epoch": 0.17, + "learning_rate": 0.000435575, + "loss": 0.8831, + "step": 17440 + }, + { + "epoch": 0.17, + "learning_rate": 0.000435825, + "loss": 0.8838, + "step": 17450 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004360749999999999, + "loss": 0.8913, + "step": 17460 + }, + { + "epoch": 0.17, + "learning_rate": 0.000436325, + "loss": 0.8844, + "step": 17470 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004365749999999999, + "loss": 0.8904, + "step": 17480 + }, + { + "epoch": 0.17, + "learning_rate": 0.000436825, + "loss": 0.893, + "step": 17490 + }, + { + "epoch": 0.17, + "learning_rate": 0.00043707499999999993, + "loss": 0.8983, + "step": 17500 + }, + { + "epoch": 0.17, + "eval_accuracy": 0.8182743179218973, + "eval_loss": 0.91259765625, + "eval_runtime": 97.2312, + "eval_samples_per_second": 822.781, + "eval_steps_per_second": 1.615, + "step": 17500 + }, + { + "epoch": 0.18, + "learning_rate": 0.000437325, + "loss": 0.8869, + "step": 17510 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043757499999999994, + "loss": 0.892, + "step": 17520 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004378249999999999, + "loss": 0.8886, + "step": 17530 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043807499999999996, + "loss": 0.8904, + "step": 17540 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043832499999999993, + "loss": 0.8751, + "step": 17550 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043857499999999997, + "loss": 0.8874, + "step": 17560 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043882499999999995, + "loss": 0.8813, + "step": 17570 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004390749999999999, + "loss": 0.8831, + "step": 17580 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043932499999999996, + "loss": 0.8802, + "step": 17590 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043957499999999994, + "loss": 0.8797, + "step": 17600 + }, + { + "epoch": 0.18, + "learning_rate": 0.00043982499999999997, + "loss": 0.8671, + "step": 17610 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044007499999999995, + "loss": 0.8755, + "step": 17620 + }, + { + "epoch": 0.18, + "learning_rate": 0.000440325, + "loss": 0.8715, + "step": 17630 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044057499999999996, + "loss": 0.8605, + "step": 17640 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044082499999999994, + "loss": 0.859, + "step": 17650 + }, + { + "epoch": 0.18, + "learning_rate": 0.000441075, + "loss": 0.8716, + "step": 17660 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044132499999999995, + "loss": 0.8661, + "step": 17670 + }, + { + "epoch": 0.18, + "learning_rate": 0.000441575, + "loss": 0.8838, + "step": 17680 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044182499999999997, + "loss": 0.8765, + "step": 17690 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044207499999999994, + "loss": 0.866, + "step": 17700 + }, + { + "epoch": 0.18, + "learning_rate": 0.000442325, + "loss": 0.8841, + "step": 17710 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044257499999999996, + "loss": 0.8905, + "step": 17720 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004428, + "loss": 0.8865, + "step": 17730 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044305, + "loss": 0.8793, + "step": 17740 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004433, + "loss": 0.8794, + "step": 17750 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004435499999999999, + "loss": 0.875, + "step": 17760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004438, + "loss": 0.8675, + "step": 17770 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004440499999999999, + "loss": 0.8694, + "step": 17780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004443, + "loss": 0.8795, + "step": 17790 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044454999999999994, + "loss": 0.8764, + "step": 17800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004447999999999999, + "loss": 0.8657, + "step": 17810 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044504999999999995, + "loss": 0.8679, + "step": 17820 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044529999999999993, + "loss": 0.8732, + "step": 17830 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044554999999999996, + "loss": 0.8791, + "step": 17840 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044579999999999994, + "loss": 0.8636, + "step": 17850 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004460499999999999, + "loss": 0.882, + "step": 17860 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044629999999999995, + "loss": 0.8777, + "step": 17870 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044654999999999993, + "loss": 0.8735, + "step": 17880 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044679999999999996, + "loss": 0.8776, + "step": 17890 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044704999999999994, + "loss": 0.891, + "step": 17900 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004473, + "loss": 0.9082, + "step": 17910 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044754999999999996, + "loss": 0.9046, + "step": 17920 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044779999999999993, + "loss": 0.8918, + "step": 17930 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044804999999999997, + "loss": 0.8849, + "step": 17940 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044829999999999995, + "loss": 0.8936, + "step": 17950 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044855, + "loss": 0.8865, + "step": 17960 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044879999999999996, + "loss": 0.8825, + "step": 17970 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044904999999999994, + "loss": 0.8978, + "step": 17980 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044929999999999997, + "loss": 0.8707, + "step": 17990 + }, + { + "epoch": 0.18, + "learning_rate": 0.00044954999999999995, + "loss": 0.8807, + "step": 18000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004498, + "loss": 0.8942, + "step": 18010 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045004999999999996, + "loss": 0.8926, + "step": 18020 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045029999999999994, + "loss": 0.8825, + "step": 18030 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045055, + "loss": 0.8993, + "step": 18040 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045079999999999995, + "loss": 0.896, + "step": 18050 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045105, + "loss": 0.8884, + "step": 18060 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045129999999999997, + "loss": 0.8755, + "step": 18070 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045155, + "loss": 0.8749, + "step": 18080 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004518, + "loss": 0.9011, + "step": 18090 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045204999999999996, + "loss": 0.8811, + "step": 18100 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004523, + "loss": 0.8883, + "step": 18110 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045254999999999997, + "loss": 0.8898, + "step": 18120 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004528, + "loss": 0.8912, + "step": 18130 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045305, + "loss": 0.8892, + "step": 18140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004532999999999999, + "loss": 0.8652, + "step": 18150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045355, + "loss": 0.8985, + "step": 18160 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004537999999999999, + "loss": 0.8885, + "step": 18170 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045405, + "loss": 0.8844, + "step": 18180 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045429999999999993, + "loss": 0.8754, + "step": 18190 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045455, + "loss": 0.8633, + "step": 18200 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045479999999999994, + "loss": 0.8808, + "step": 18210 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004550499999999999, + "loss": 0.8859, + "step": 18220 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045529999999999995, + "loss": 0.8836, + "step": 18230 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045554999999999993, + "loss": 0.885, + "step": 18240 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045579999999999997, + "loss": 0.8675, + "step": 18250 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045604999999999995, + "loss": 0.8838, + "step": 18260 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004562999999999999, + "loss": 0.8973, + "step": 18270 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045654999999999996, + "loss": 0.8845, + "step": 18280 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045679999999999994, + "loss": 0.8809, + "step": 18290 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045704999999999997, + "loss": 0.8767, + "step": 18300 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045729999999999995, + "loss": 0.8878, + "step": 18310 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045754999999999993, + "loss": 0.8706, + "step": 18320 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045779999999999996, + "loss": 0.884, + "step": 18330 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045804999999999994, + "loss": 0.8678, + "step": 18340 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045829999999999997, + "loss": 0.8785, + "step": 18350 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045854999999999995, + "loss": 0.8933, + "step": 18360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004588, + "loss": 0.8826, + "step": 18370 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045904999999999996, + "loss": 0.8625, + "step": 18380 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045929999999999994, + "loss": 0.8767, + "step": 18390 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045955, + "loss": 0.8611, + "step": 18400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00045979999999999995, + "loss": 0.8732, + "step": 18410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046005, + "loss": 0.8704, + "step": 18420 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046029999999999997, + "loss": 0.8753, + "step": 18430 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046054999999999995, + "loss": 0.8651, + "step": 18440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004608, + "loss": 0.8572, + "step": 18450 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046104999999999996, + "loss": 0.85, + "step": 18460 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004613, + "loss": 0.8867, + "step": 18470 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046154999999999997, + "loss": 0.8704, + "step": 18480 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004618, + "loss": 0.8707, + "step": 18490 + }, + { + "epoch": 0.18, + "learning_rate": 0.00046205, + "loss": 0.8768, + "step": 18500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046229999999999996, + "loss": 0.8776, + "step": 18510 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046255, + "loss": 0.8515, + "step": 18520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004628, + "loss": 0.8539, + "step": 18530 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046305, + "loss": 0.8578, + "step": 18540 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004633, + "loss": 0.8909, + "step": 18550 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004635499999999999, + "loss": 0.8641, + "step": 18560 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004638, + "loss": 0.8626, + "step": 18570 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004640499999999999, + "loss": 0.8804, + "step": 18580 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004643, + "loss": 0.8824, + "step": 18590 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046454999999999993, + "loss": 0.88, + "step": 18600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004647999999999999, + "loss": 0.8648, + "step": 18610 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046504999999999995, + "loss": 0.8716, + "step": 18620 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004652999999999999, + "loss": 0.871, + "step": 18630 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046554999999999996, + "loss": 0.884, + "step": 18640 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046579999999999994, + "loss": 0.8656, + "step": 18650 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046604999999999997, + "loss": 0.8872, + "step": 18660 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046629999999999995, + "loss": 0.8771, + "step": 18670 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046654999999999993, + "loss": 0.8846, + "step": 18680 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046679999999999996, + "loss": 0.8677, + "step": 18690 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046704999999999994, + "loss": 0.8832, + "step": 18700 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004673, + "loss": 0.8815, + "step": 18710 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046754999999999995, + "loss": 0.8752, + "step": 18720 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046777499999999997, + "loss": 0.8859, + "step": 18730 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046802499999999995, + "loss": 0.8812, + "step": 18740 + }, + { + "epoch": 0.19, + "learning_rate": 0.000468275, + "loss": 0.8743, + "step": 18750 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046852499999999996, + "loss": 0.8647, + "step": 18760 + }, + { + "epoch": 0.19, + "learning_rate": 0.000468775, + "loss": 0.8854, + "step": 18770 + }, + { + "epoch": 0.19, + "learning_rate": 0.000469025, + "loss": 0.8724, + "step": 18780 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046927499999999995, + "loss": 0.8696, + "step": 18790 + }, + { + "epoch": 0.19, + "learning_rate": 0.000469525, + "loss": 0.8873, + "step": 18800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00046977499999999997, + "loss": 0.8876, + "step": 18810 + }, + { + "epoch": 0.19, + "learning_rate": 0.000470025, + "loss": 0.874, + "step": 18820 + }, + { + "epoch": 0.19, + "learning_rate": 0.000470275, + "loss": 0.8747, + "step": 18830 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004705249999999999, + "loss": 0.8735, + "step": 18840 + }, + { + "epoch": 0.19, + "learning_rate": 0.000470775, + "loss": 0.8863, + "step": 18850 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004710249999999999, + "loss": 0.8821, + "step": 18860 + }, + { + "epoch": 0.19, + "learning_rate": 0.000471275, + "loss": 0.8747, + "step": 18870 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047152499999999993, + "loss": 0.8709, + "step": 18880 + }, + { + "epoch": 0.19, + "learning_rate": 0.000471775, + "loss": 0.8837, + "step": 18890 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047202499999999994, + "loss": 0.8673, + "step": 18900 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004722749999999999, + "loss": 0.8712, + "step": 18910 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047252499999999995, + "loss": 0.8723, + "step": 18920 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047277499999999993, + "loss": 0.8574, + "step": 18930 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047302499999999996, + "loss": 0.8789, + "step": 18940 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047327499999999994, + "loss": 0.8872, + "step": 18950 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004735249999999999, + "loss": 0.8802, + "step": 18960 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047377499999999996, + "loss": 0.8921, + "step": 18970 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047402499999999993, + "loss": 0.8751, + "step": 18980 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047427499999999997, + "loss": 0.8665, + "step": 18990 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047452499999999995, + "loss": 0.8721, + "step": 19000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004747749999999999, + "loss": 0.8839, + "step": 19010 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047502499999999996, + "loss": 0.8676, + "step": 19020 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047527499999999994, + "loss": 0.8704, + "step": 19030 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047552499999999997, + "loss": 0.8782, + "step": 19040 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047577499999999995, + "loss": 0.8763, + "step": 19050 + }, + { + "epoch": 0.19, + "learning_rate": 0.000476025, + "loss": 0.8463, + "step": 19060 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047627499999999996, + "loss": 0.8617, + "step": 19070 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047652499999999994, + "loss": 0.8501, + "step": 19080 + }, + { + "epoch": 0.19, + "learning_rate": 0.000476775, + "loss": 0.8741, + "step": 19090 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047702499999999995, + "loss": 0.876, + "step": 19100 + }, + { + "epoch": 0.19, + "learning_rate": 0.000477275, + "loss": 0.8645, + "step": 19110 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047752499999999997, + "loss": 0.8666, + "step": 19120 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047777499999999994, + "loss": 0.869, + "step": 19130 + }, + { + "epoch": 0.19, + "learning_rate": 0.000478025, + "loss": 0.8542, + "step": 19140 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047827499999999996, + "loss": 0.8722, + "step": 19150 + }, + { + "epoch": 0.19, + "learning_rate": 0.000478525, + "loss": 0.8885, + "step": 19160 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047877499999999997, + "loss": 0.876, + "step": 19170 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047902499999999995, + "loss": 0.8765, + "step": 19180 + }, + { + "epoch": 0.19, + "learning_rate": 0.000479275, + "loss": 0.8642, + "step": 19190 + }, + { + "epoch": 0.19, + "learning_rate": 0.00047952499999999996, + "loss": 0.8772, + "step": 19200 + }, + { + "epoch": 0.19, + "learning_rate": 0.000479775, + "loss": 0.8533, + "step": 19210 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048002499999999997, + "loss": 0.8791, + "step": 19220 + }, + { + "epoch": 0.19, + "learning_rate": 0.000480275, + "loss": 0.8693, + "step": 19230 + }, + { + "epoch": 0.19, + "learning_rate": 0.000480525, + "loss": 0.871, + "step": 19240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004807749999999999, + "loss": 0.8712, + "step": 19250 + }, + { + "epoch": 0.19, + "learning_rate": 0.000481025, + "loss": 0.8774, + "step": 19260 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004812749999999999, + "loss": 0.8788, + "step": 19270 + }, + { + "epoch": 0.19, + "learning_rate": 0.000481525, + "loss": 0.873, + "step": 19280 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048177499999999993, + "loss": 0.8738, + "step": 19290 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004820249999999999, + "loss": 0.8689, + "step": 19300 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048227499999999995, + "loss": 0.8711, + "step": 19310 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004825249999999999, + "loss": 0.8706, + "step": 19320 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048277499999999996, + "loss": 0.8562, + "step": 19330 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048302499999999994, + "loss": 0.8691, + "step": 19340 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048327499999999997, + "loss": 0.8735, + "step": 19350 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048352499999999995, + "loss": 0.8635, + "step": 19360 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048377499999999993, + "loss": 0.855, + "step": 19370 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048402499999999996, + "loss": 0.8673, + "step": 19380 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048427499999999994, + "loss": 0.8831, + "step": 19390 + }, + { + "epoch": 0.19, + "learning_rate": 0.000484525, + "loss": 0.8582, + "step": 19400 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048477499999999995, + "loss": 0.8718, + "step": 19410 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048502499999999993, + "loss": 0.8744, + "step": 19420 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048527499999999996, + "loss": 0.8592, + "step": 19430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048552499999999994, + "loss": 0.8467, + "step": 19440 + }, + { + "epoch": 0.19, + "learning_rate": 0.000485775, + "loss": 0.8335, + "step": 19450 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048602499999999996, + "loss": 0.8515, + "step": 19460 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048627499999999993, + "loss": 0.8433, + "step": 19470 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048652499999999997, + "loss": 0.85, + "step": 19480 + }, + { + "epoch": 0.19, + "learning_rate": 0.00048677499999999995, + "loss": 0.8501, + "step": 19490 + }, + { + "epoch": 0.2, + "learning_rate": 0.000487025, + "loss": 0.8527, + "step": 19500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048727499999999996, + "loss": 0.8431, + "step": 19510 + }, + { + "epoch": 0.2, + "learning_rate": 0.000487525, + "loss": 0.8531, + "step": 19520 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048777499999999997, + "loss": 0.8515, + "step": 19530 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048802499999999995, + "loss": 0.8508, + "step": 19540 + }, + { + "epoch": 0.2, + "learning_rate": 0.000488275, + "loss": 0.846, + "step": 19550 + }, + { + "epoch": 0.2, + "learning_rate": 0.000488525, + "loss": 0.8413, + "step": 19560 + }, + { + "epoch": 0.2, + "learning_rate": 0.000488775, + "loss": 0.8532, + "step": 19570 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004890249999999999, + "loss": 0.8424, + "step": 19580 + }, + { + "epoch": 0.2, + "learning_rate": 0.000489275, + "loss": 0.8488, + "step": 19590 + }, + { + "epoch": 0.2, + "learning_rate": 0.000489525, + "loss": 0.8475, + "step": 19600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004897749999999999, + "loss": 0.8432, + "step": 19610 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004900249999999999, + "loss": 0.8494, + "step": 19620 + }, + { + "epoch": 0.2, + "learning_rate": 0.000490275, + "loss": 0.849, + "step": 19630 + }, + { + "epoch": 0.2, + "learning_rate": 0.000490525, + "loss": 0.8531, + "step": 19640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004907749999999999, + "loss": 0.8527, + "step": 19650 + }, + { + "epoch": 0.2, + "learning_rate": 0.000491025, + "loss": 0.8762, + "step": 19660 + }, + { + "epoch": 0.2, + "learning_rate": 0.000491275, + "loss": 0.867, + "step": 19670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004915249999999999, + "loss": 0.8551, + "step": 19680 + }, + { + "epoch": 0.2, + "learning_rate": 0.000491775, + "loss": 0.854, + "step": 19690 + }, + { + "epoch": 0.2, + "learning_rate": 0.000492025, + "loss": 0.868, + "step": 19700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004922749999999999, + "loss": 0.8572, + "step": 19710 + }, + { + "epoch": 0.2, + "learning_rate": 0.000492525, + "loss": 0.863, + "step": 19720 + }, + { + "epoch": 0.2, + "learning_rate": 0.000492775, + "loss": 0.8684, + "step": 19730 + }, + { + "epoch": 0.2, + "learning_rate": 0.000493, + "loss": 0.8623, + "step": 19740 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049325, + "loss": 0.8543, + "step": 19750 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004935, + "loss": 0.8525, + "step": 19760 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004937499999999999, + "loss": 0.8636, + "step": 19770 + }, + { + "epoch": 0.2, + "learning_rate": 0.000494, + "loss": 0.8801, + "step": 19780 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004942499999999999, + "loss": 0.8639, + "step": 19790 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004944999999999999, + "loss": 0.8695, + "step": 19800 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049475, + "loss": 0.8659, + "step": 19810 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004949999999999999, + "loss": 0.864, + "step": 19820 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004952499999999999, + "loss": 0.8424, + "step": 19830 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004955, + "loss": 0.8659, + "step": 19840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049575, + "loss": 0.8535, + "step": 19850 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004959999999999999, + "loss": 0.871, + "step": 19860 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049625, + "loss": 0.8599, + "step": 19870 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004965, + "loss": 0.849, + "step": 19880 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004967499999999999, + "loss": 0.8578, + "step": 19890 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004969999999999999, + "loss": 0.8619, + "step": 19900 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049725, + "loss": 0.8659, + "step": 19910 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004975, + "loss": 0.866, + "step": 19920 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004977499999999999, + "loss": 0.8616, + "step": 19930 + }, + { + "epoch": 0.2, + "learning_rate": 0.000498, + "loss": 0.8478, + "step": 19940 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049825, + "loss": 0.8596, + "step": 19950 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004984999999999999, + "loss": 0.862, + "step": 19960 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049875, + "loss": 0.8678, + "step": 19970 + }, + { + "epoch": 0.2, + "learning_rate": 0.000499, + "loss": 0.879, + "step": 19980 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004992499999999999, + "loss": 0.8744, + "step": 19990 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004994999999999999, + "loss": 0.8751, + "step": 20000 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.8198587930184829, + "eval_loss": 0.89892578125, + "eval_runtime": 96.5681, + "eval_samples_per_second": 828.431, + "eval_steps_per_second": 1.626, + "step": 20000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00049975, + "loss": 0.8638, + "step": 20010 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005, + "loss": 0.8699, + "step": 20020 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005002499999999999, + "loss": 0.8509, + "step": 20030 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005005, + "loss": 0.853, + "step": 20040 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050075, + "loss": 0.8575, + "step": 20050 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005009999999999999, + "loss": 0.8483, + "step": 20060 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050125, + "loss": 0.8648, + "step": 20070 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005015, + "loss": 0.8515, + "step": 20080 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050175, + "loss": 0.8699, + "step": 20090 + }, + { + "epoch": 0.2, + "learning_rate": 0.000502, + "loss": 0.8394, + "step": 20100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050225, + "loss": 0.8705, + "step": 20110 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005025, + "loss": 0.8578, + "step": 20120 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005027499999999999, + "loss": 0.8525, + "step": 20130 + }, + { + "epoch": 0.2, + "learning_rate": 0.000503, + "loss": 0.8574, + "step": 20140 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005032499999999999, + "loss": 0.8574, + "step": 20150 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005034999999999999, + "loss": 0.8503, + "step": 20160 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050375, + "loss": 0.859, + "step": 20170 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005039999999999999, + "loss": 0.8582, + "step": 20180 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005042499999999999, + "loss": 0.8535, + "step": 20190 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005045, + "loss": 0.8333, + "step": 20200 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050475, + "loss": 0.8377, + "step": 20210 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005049999999999999, + "loss": 0.8509, + "step": 20220 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050525, + "loss": 0.8661, + "step": 20230 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005055, + "loss": 0.8696, + "step": 20240 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005057499999999999, + "loss": 0.8537, + "step": 20250 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005059999999999999, + "loss": 0.8625, + "step": 20260 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050625, + "loss": 0.859, + "step": 20270 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005064999999999999, + "loss": 0.8617, + "step": 20280 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005067499999999999, + "loss": 0.8424, + "step": 20290 + }, + { + "epoch": 0.2, + "learning_rate": 0.000507, + "loss": 0.8521, + "step": 20300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050725, + "loss": 0.8474, + "step": 20310 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005074999999999999, + "loss": 0.8544, + "step": 20320 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050775, + "loss": 0.8454, + "step": 20330 + }, + { + "epoch": 0.2, + "learning_rate": 0.000508, + "loss": 0.8555, + "step": 20340 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005082499999999999, + "loss": 0.8385, + "step": 20350 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005085, + "loss": 0.8507, + "step": 20360 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050875, + "loss": 0.8535, + "step": 20370 + }, + { + "epoch": 0.2, + "learning_rate": 0.000509, + "loss": 0.8636, + "step": 20380 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005092499999999999, + "loss": 0.8672, + "step": 20390 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005095, + "loss": 0.8736, + "step": 20400 + }, + { + "epoch": 0.2, + "learning_rate": 0.00050975, + "loss": 0.8626, + "step": 20410 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005099999999999999, + "loss": 0.8581, + "step": 20420 + }, + { + "epoch": 0.2, + "learning_rate": 0.00051025, + "loss": 0.8369, + "step": 20430 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005105, + "loss": 0.8421, + "step": 20440 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005107499999999999, + "loss": 0.8572, + "step": 20450 + }, + { + "epoch": 0.2, + "learning_rate": 0.000511, + "loss": 0.8611, + "step": 20460 + }, + { + "epoch": 0.2, + "learning_rate": 0.00051125, + "loss": 0.849, + "step": 20470 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005115, + "loss": 0.8553, + "step": 20480 + }, + { + "epoch": 0.2, + "learning_rate": 0.0005117499999999999, + "loss": 0.8542, + "step": 20490 + }, + { + "epoch": 0.2, + "learning_rate": 0.000512, + "loss": 0.851, + "step": 20500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051225, + "loss": 0.8633, + "step": 20510 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005124999999999999, + "loss": 0.8568, + "step": 20520 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051275, + "loss": 0.8648, + "step": 20530 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005129999999999999, + "loss": 0.8609, + "step": 20540 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051325, + "loss": 0.872, + "step": 20550 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005135, + "loss": 0.8543, + "step": 20560 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005137499999999999, + "loss": 0.8456, + "step": 20570 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005139999999999999, + "loss": 0.8625, + "step": 20580 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051425, + "loss": 0.863, + "step": 20590 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005145, + "loss": 0.8661, + "step": 20600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005147499999999999, + "loss": 0.853, + "step": 20610 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005149999999999999, + "loss": 0.8355, + "step": 20620 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051525, + "loss": 0.8516, + "step": 20630 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005154999999999999, + "loss": 0.8582, + "step": 20640 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005157499999999999, + "loss": 0.8616, + "step": 20650 + }, + { + "epoch": 0.21, + "learning_rate": 0.000516, + "loss": 0.872, + "step": 20660 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051625, + "loss": 0.8662, + "step": 20670 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005164999999999999, + "loss": 0.8629, + "step": 20680 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051675, + "loss": 0.8667, + "step": 20690 + }, + { + "epoch": 0.21, + "learning_rate": 0.000517, + "loss": 0.8714, + "step": 20700 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005172499999999999, + "loss": 0.8544, + "step": 20710 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005175, + "loss": 0.858, + "step": 20720 + }, + { + "epoch": 0.21, + "learning_rate": 0.00051775, + "loss": 0.8394, + "step": 20730 + }, + { + "epoch": 0.21, + "learning_rate": 0.000517975, + "loss": 0.8487, + "step": 20740 + }, + { + "epoch": 0.21, + "learning_rate": 0.000518225, + "loss": 0.8566, + "step": 20750 + }, + { + "epoch": 0.21, + "learning_rate": 0.000518475, + "loss": 0.8522, + "step": 20760 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005187249999999999, + "loss": 0.8654, + "step": 20770 + }, + { + "epoch": 0.21, + "learning_rate": 0.000518975, + "loss": 0.8526, + "step": 20780 + }, + { + "epoch": 0.21, + "learning_rate": 0.000519225, + "loss": 0.8596, + "step": 20790 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005194749999999999, + "loss": 0.8721, + "step": 20800 + }, + { + "epoch": 0.21, + "learning_rate": 0.000519725, + "loss": 0.8496, + "step": 20810 + }, + { + "epoch": 0.21, + "learning_rate": 0.000519975, + "loss": 0.8524, + "step": 20820 + }, + { + "epoch": 0.21, + "learning_rate": 0.000520225, + "loss": 0.8407, + "step": 20830 + }, + { + "epoch": 0.21, + "learning_rate": 0.000520475, + "loss": 0.842, + "step": 20840 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005207249999999999, + "loss": 0.8428, + "step": 20850 + }, + { + "epoch": 0.21, + "learning_rate": 0.000520975, + "loss": 0.8262, + "step": 20860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005212249999999999, + "loss": 0.8374, + "step": 20870 + }, + { + "epoch": 0.21, + "learning_rate": 0.000521475, + "loss": 0.8292, + "step": 20880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005217249999999999, + "loss": 0.8493, + "step": 20890 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005219749999999999, + "loss": 0.8546, + "step": 20900 + }, + { + "epoch": 0.21, + "learning_rate": 0.000522225, + "loss": 0.8577, + "step": 20910 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005224749999999999, + "loss": 0.8526, + "step": 20920 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005227249999999999, + "loss": 0.8755, + "step": 20930 + }, + { + "epoch": 0.21, + "learning_rate": 0.000522975, + "loss": 0.8588, + "step": 20940 + }, + { + "epoch": 0.21, + "learning_rate": 0.000523225, + "loss": 0.8551, + "step": 20950 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005234749999999999, + "loss": 0.8439, + "step": 20960 + }, + { + "epoch": 0.21, + "learning_rate": 0.000523725, + "loss": 0.8404, + "step": 20970 + }, + { + "epoch": 0.21, + "learning_rate": 0.000523975, + "loss": 0.8425, + "step": 20980 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005242249999999999, + "loss": 0.8488, + "step": 20990 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005244749999999999, + "loss": 0.8544, + "step": 21000 + }, + { + "epoch": 0.21, + "learning_rate": 0.000524725, + "loss": 0.8515, + "step": 21010 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005249749999999999, + "loss": 0.8592, + "step": 21020 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005252249999999999, + "loss": 0.8461, + "step": 21030 + }, + { + "epoch": 0.21, + "learning_rate": 0.000525475, + "loss": 0.8671, + "step": 21040 + }, + { + "epoch": 0.21, + "learning_rate": 0.000525725, + "loss": 0.848, + "step": 21050 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005259749999999999, + "loss": 0.8458, + "step": 21060 + }, + { + "epoch": 0.21, + "learning_rate": 0.000526225, + "loss": 0.8492, + "step": 21070 + }, + { + "epoch": 0.21, + "learning_rate": 0.000526475, + "loss": 0.8559, + "step": 21080 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005267249999999999, + "loss": 0.8533, + "step": 21090 + }, + { + "epoch": 0.21, + "learning_rate": 0.000526975, + "loss": 0.8399, + "step": 21100 + }, + { + "epoch": 0.21, + "learning_rate": 0.000527225, + "loss": 0.839, + "step": 21110 + }, + { + "epoch": 0.21, + "learning_rate": 0.000527475, + "loss": 0.8347, + "step": 21120 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005277249999999999, + "loss": 0.8422, + "step": 21130 + }, + { + "epoch": 0.21, + "learning_rate": 0.000527975, + "loss": 0.8471, + "step": 21140 + }, + { + "epoch": 0.21, + "learning_rate": 0.000528225, + "loss": 0.8427, + "step": 21150 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005284749999999999, + "loss": 0.8431, + "step": 21160 + }, + { + "epoch": 0.21, + "learning_rate": 0.000528725, + "loss": 0.8579, + "step": 21170 + }, + { + "epoch": 0.21, + "learning_rate": 0.000528975, + "loss": 0.8445, + "step": 21180 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005292249999999999, + "loss": 0.8808, + "step": 21190 + }, + { + "epoch": 0.21, + "learning_rate": 0.000529475, + "loss": 0.8514, + "step": 21200 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005297249999999999, + "loss": 0.8644, + "step": 21210 + }, + { + "epoch": 0.21, + "learning_rate": 0.000529975, + "loss": 0.8643, + "step": 21220 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005302249999999999, + "loss": 0.8652, + "step": 21230 + }, + { + "epoch": 0.21, + "learning_rate": 0.000530475, + "loss": 0.8588, + "step": 21240 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005307249999999999, + "loss": 0.8664, + "step": 21250 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005309749999999999, + "loss": 0.8559, + "step": 21260 + }, + { + "epoch": 0.21, + "learning_rate": 0.000531225, + "loss": 0.8528, + "step": 21270 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005314749999999999, + "loss": 0.8701, + "step": 21280 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005317249999999999, + "loss": 0.8587, + "step": 21290 + }, + { + "epoch": 0.21, + "learning_rate": 0.000531975, + "loss": 0.8486, + "step": 21300 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005322249999999999, + "loss": 0.834, + "step": 21310 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005324749999999999, + "loss": 0.8394, + "step": 21320 + }, + { + "epoch": 0.21, + "learning_rate": 0.000532725, + "loss": 0.8362, + "step": 21330 + }, + { + "epoch": 0.21, + "learning_rate": 0.000532975, + "loss": 0.8399, + "step": 21340 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005332249999999999, + "loss": 0.8561, + "step": 21350 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005334749999999999, + "loss": 0.8599, + "step": 21360 + }, + { + "epoch": 0.21, + "learning_rate": 0.000533725, + "loss": 0.8555, + "step": 21370 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005339749999999999, + "loss": 0.8606, + "step": 21380 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005342249999999999, + "loss": 0.8653, + "step": 21390 + }, + { + "epoch": 0.21, + "learning_rate": 0.000534475, + "loss": 0.8586, + "step": 21400 + }, + { + "epoch": 0.21, + "learning_rate": 0.000534725, + "loss": 0.8409, + "step": 21410 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005349749999999999, + "loss": 0.8412, + "step": 21420 + }, + { + "epoch": 0.21, + "learning_rate": 0.000535225, + "loss": 0.8464, + "step": 21430 + }, + { + "epoch": 0.21, + "learning_rate": 0.000535475, + "loss": 0.8684, + "step": 21440 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005357249999999999, + "loss": 0.8533, + "step": 21450 + }, + { + "epoch": 0.21, + "learning_rate": 0.000535975, + "loss": 0.8652, + "step": 21460 + }, + { + "epoch": 0.21, + "learning_rate": 0.000536225, + "loss": 0.8394, + "step": 21470 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005364749999999999, + "loss": 0.8395, + "step": 21480 + }, + { + "epoch": 0.21, + "learning_rate": 0.0005367249999999999, + "loss": 0.8405, + "step": 21490 + }, + { + "epoch": 0.21, + "learning_rate": 0.000536975, + "loss": 0.861, + "step": 21500 + }, + { + "epoch": 0.22, + "learning_rate": 0.000537225, + "loss": 0.8605, + "step": 21510 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005374749999999999, + "loss": 0.8449, + "step": 21520 + }, + { + "epoch": 0.22, + "learning_rate": 0.000537725, + "loss": 0.8417, + "step": 21530 + }, + { + "epoch": 0.22, + "learning_rate": 0.000537975, + "loss": 0.842, + "step": 21540 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005382249999999999, + "loss": 0.8467, + "step": 21550 + }, + { + "epoch": 0.22, + "learning_rate": 0.000538475, + "loss": 0.8455, + "step": 21560 + }, + { + "epoch": 0.22, + "learning_rate": 0.000538725, + "loss": 0.8414, + "step": 21570 + }, + { + "epoch": 0.22, + "learning_rate": 0.000538975, + "loss": 0.8568, + "step": 21580 + }, + { + "epoch": 0.22, + "learning_rate": 0.000539225, + "loss": 0.85, + "step": 21590 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005394749999999999, + "loss": 0.8696, + "step": 21600 + }, + { + "epoch": 0.22, + "learning_rate": 0.000539725, + "loss": 0.8536, + "step": 21610 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005399749999999999, + "loss": 0.8483, + "step": 21620 + }, + { + "epoch": 0.22, + "learning_rate": 0.000540225, + "loss": 0.8525, + "step": 21630 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005404749999999999, + "loss": 0.8526, + "step": 21640 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005407249999999999, + "loss": 0.8366, + "step": 21650 + }, + { + "epoch": 0.22, + "learning_rate": 0.000540975, + "loss": 0.8595, + "step": 21660 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005412249999999999, + "loss": 0.84, + "step": 21670 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005414749999999999, + "loss": 0.8594, + "step": 21680 + }, + { + "epoch": 0.22, + "learning_rate": 0.000541725, + "loss": 0.8491, + "step": 21690 + }, + { + "epoch": 0.22, + "learning_rate": 0.000541975, + "loss": 0.8567, + "step": 21700 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005422249999999999, + "loss": 0.8567, + "step": 21710 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005424749999999999, + "loss": 0.8429, + "step": 21720 + }, + { + "epoch": 0.22, + "learning_rate": 0.000542725, + "loss": 0.8601, + "step": 21730 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005429499999999999, + "loss": 0.8466, + "step": 21740 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005432, + "loss": 0.8704, + "step": 21750 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054345, + "loss": 0.851, + "step": 21760 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005436999999999999, + "loss": 0.8732, + "step": 21770 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054395, + "loss": 0.8686, + "step": 21780 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005442, + "loss": 0.852, + "step": 21790 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005444499999999999, + "loss": 0.8505, + "step": 21800 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005447, + "loss": 0.8207, + "step": 21810 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054495, + "loss": 0.8405, + "step": 21820 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005451999999999999, + "loss": 0.8289, + "step": 21830 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054545, + "loss": 0.8277, + "step": 21840 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005457, + "loss": 0.848, + "step": 21850 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054595, + "loss": 0.8432, + "step": 21860 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005461999999999999, + "loss": 0.8451, + "step": 21870 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054645, + "loss": 0.8518, + "step": 21880 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005467, + "loss": 0.8537, + "step": 21890 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005469499999999999, + "loss": 0.8512, + "step": 21900 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005472, + "loss": 0.8544, + "step": 21910 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054745, + "loss": 0.8408, + "step": 21920 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005477, + "loss": 0.8433, + "step": 21930 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054795, + "loss": 0.8379, + "step": 21940 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005481999999999999, + "loss": 0.8502, + "step": 21950 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005484499999999999, + "loss": 0.8576, + "step": 21960 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005487, + "loss": 0.8571, + "step": 21970 + }, + { + "epoch": 0.22, + "learning_rate": 0.00054895, + "loss": 0.8501, + "step": 21980 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005491999999999999, + "loss": 0.8382, + "step": 21990 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005494499999999999, + "loss": 0.8386, + "step": 22000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005497, + "loss": 0.8449, + "step": 22010 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005499499999999999, + "loss": 0.855, + "step": 22020 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005501999999999999, + "loss": 0.8483, + "step": 22030 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055045, + "loss": 0.8802, + "step": 22040 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005506999999999999, + "loss": 0.8487, + "step": 22050 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005509499999999999, + "loss": 0.8665, + "step": 22060 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005512, + "loss": 0.8622, + "step": 22070 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055145, + "loss": 0.8407, + "step": 22080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005516999999999999, + "loss": 0.8573, + "step": 22090 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005519499999999999, + "loss": 0.8576, + "step": 22100 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005522, + "loss": 0.8431, + "step": 22110 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005524499999999999, + "loss": 0.8333, + "step": 22120 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005526999999999999, + "loss": 0.8526, + "step": 22130 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055295, + "loss": 0.8583, + "step": 22140 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005532, + "loss": 0.8526, + "step": 22150 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005534499999999999, + "loss": 0.8519, + "step": 22160 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005537, + "loss": 0.8788, + "step": 22170 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055395, + "loss": 0.8524, + "step": 22180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005541999999999999, + "loss": 0.8541, + "step": 22190 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055445, + "loss": 0.8464, + "step": 22200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005547, + "loss": 0.8489, + "step": 22210 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055495, + "loss": 0.84, + "step": 22220 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005551999999999999, + "loss": 0.8484, + "step": 22230 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055545, + "loss": 0.8545, + "step": 22240 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005557, + "loss": 0.8498, + "step": 22250 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005559499999999999, + "loss": 0.8379, + "step": 22260 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005562, + "loss": 0.8475, + "step": 22270 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055645, + "loss": 0.8605, + "step": 22280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005566999999999999, + "loss": 0.8661, + "step": 22290 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055695, + "loss": 0.8457, + "step": 22300 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005571999999999999, + "loss": 0.8488, + "step": 22310 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055745, + "loss": 0.8541, + "step": 22320 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005577, + "loss": 0.8565, + "step": 22330 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005579499999999999, + "loss": 0.8472, + "step": 22340 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005581999999999999, + "loss": 0.8344, + "step": 22350 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005584499999999999, + "loss": 0.8216, + "step": 22360 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005587, + "loss": 0.8391, + "step": 22370 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005589499999999999, + "loss": 0.8371, + "step": 22380 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005591999999999999, + "loss": 0.85, + "step": 22390 + }, + { + "epoch": 0.22, + "learning_rate": 0.00055945, + "loss": 0.8419, + "step": 22400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005596999999999999, + "loss": 0.8556, + "step": 22410 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005599499999999999, + "loss": 0.8554, + "step": 22420 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005602, + "loss": 0.8672, + "step": 22430 + }, + { + "epoch": 0.22, + "learning_rate": 0.00056045, + "loss": 0.8626, + "step": 22440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005606999999999999, + "loss": 0.8509, + "step": 22450 + }, + { + "epoch": 0.22, + "learning_rate": 0.00056095, + "loss": 0.8573, + "step": 22460 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005612, + "loss": 0.8384, + "step": 22470 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005614499999999999, + "loss": 0.8403, + "step": 22480 + }, + { + "epoch": 0.22, + "learning_rate": 0.0005616999999999999, + "loss": 0.8321, + "step": 22490 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056195, + "loss": 0.8331, + "step": 22500 + }, + { + "epoch": 0.23, + "eval_accuracy": 0.8251687860262823, + "eval_loss": 0.86767578125, + "eval_runtime": 96.699, + "eval_samples_per_second": 827.309, + "eval_steps_per_second": 1.624, + "step": 22500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005622, + "loss": 0.8412, + "step": 22510 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005624499999999999, + "loss": 0.8565, + "step": 22520 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005627, + "loss": 0.8348, + "step": 22530 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056295, + "loss": 0.8422, + "step": 22540 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005631999999999999, + "loss": 0.8546, + "step": 22550 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056345, + "loss": 0.8493, + "step": 22560 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005637, + "loss": 0.8426, + "step": 22570 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005639499999999999, + "loss": 0.8362, + "step": 22580 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005641999999999999, + "loss": 0.8515, + "step": 22590 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056445, + "loss": 0.8461, + "step": 22600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005647, + "loss": 0.8419, + "step": 22610 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005649499999999999, + "loss": 0.8442, + "step": 22620 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005652, + "loss": 0.8306, + "step": 22630 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056545, + "loss": 0.8336, + "step": 22640 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005656999999999999, + "loss": 0.8601, + "step": 22650 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056595, + "loss": 0.8297, + "step": 22660 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005662, + "loss": 0.8361, + "step": 22670 + }, + { + "epoch": 0.23, + "learning_rate": 0.00056645, + "loss": 0.8433, + "step": 22680 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005667, + "loss": 0.85, + "step": 22690 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005669499999999999, + "loss": 0.8516, + "step": 22700 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005672, + "loss": 0.8673, + "step": 22710 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005674499999999999, + "loss": 0.858, + "step": 22720 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005677, + "loss": 0.8526, + "step": 22730 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005679249999999999, + "loss": 0.8419, + "step": 22740 + }, + { + "epoch": 0.23, + "learning_rate": 0.000568175, + "loss": 0.8424, + "step": 22750 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005684249999999999, + "loss": 0.8331, + "step": 22760 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005686749999999999, + "loss": 0.8234, + "step": 22770 + }, + { + "epoch": 0.23, + "learning_rate": 0.000568925, + "loss": 0.8343, + "step": 22780 + }, + { + "epoch": 0.23, + "learning_rate": 0.000569175, + "loss": 0.8336, + "step": 22790 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005694249999999999, + "loss": 0.8394, + "step": 22800 + }, + { + "epoch": 0.23, + "learning_rate": 0.000569675, + "loss": 0.8365, + "step": 22810 + }, + { + "epoch": 0.23, + "learning_rate": 0.000569925, + "loss": 0.8417, + "step": 22820 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005701749999999999, + "loss": 0.8479, + "step": 22830 + }, + { + "epoch": 0.23, + "learning_rate": 0.000570425, + "loss": 0.8533, + "step": 22840 + }, + { + "epoch": 0.23, + "learning_rate": 0.000570675, + "loss": 0.8259, + "step": 22850 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005709249999999999, + "loss": 0.8621, + "step": 22860 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005711749999999999, + "loss": 0.8413, + "step": 22870 + }, + { + "epoch": 0.23, + "learning_rate": 0.000571425, + "loss": 0.8336, + "step": 22880 + }, + { + "epoch": 0.23, + "learning_rate": 0.000571675, + "loss": 0.8262, + "step": 22890 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005719249999999999, + "loss": 0.8366, + "step": 22900 + }, + { + "epoch": 0.23, + "learning_rate": 0.000572175, + "loss": 0.8295, + "step": 22910 + }, + { + "epoch": 0.23, + "learning_rate": 0.000572425, + "loss": 0.8373, + "step": 22920 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005726749999999999, + "loss": 0.8466, + "step": 22930 + }, + { + "epoch": 0.23, + "learning_rate": 0.000572925, + "loss": 0.8341, + "step": 22940 + }, + { + "epoch": 0.23, + "learning_rate": 0.000573175, + "loss": 0.8429, + "step": 22950 + }, + { + "epoch": 0.23, + "learning_rate": 0.000573425, + "loss": 0.8465, + "step": 22960 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005736749999999999, + "loss": 0.8499, + "step": 22970 + }, + { + "epoch": 0.23, + "learning_rate": 0.000573925, + "loss": 0.8308, + "step": 22980 + }, + { + "epoch": 0.23, + "learning_rate": 0.000574175, + "loss": 0.8424, + "step": 22990 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005744249999999999, + "loss": 0.8312, + "step": 23000 + }, + { + "epoch": 0.23, + "learning_rate": 0.000574675, + "loss": 0.8453, + "step": 23010 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005749249999999999, + "loss": 0.8476, + "step": 23020 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005751749999999999, + "loss": 0.8329, + "step": 23030 + }, + { + "epoch": 0.23, + "learning_rate": 0.000575425, + "loss": 0.8365, + "step": 23040 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005756749999999999, + "loss": 0.8487, + "step": 23050 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005759249999999999, + "loss": 0.8407, + "step": 23060 + }, + { + "epoch": 0.23, + "learning_rate": 0.000576175, + "loss": 0.8449, + "step": 23070 + }, + { + "epoch": 0.23, + "learning_rate": 0.000576425, + "loss": 0.8644, + "step": 23080 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005766749999999999, + "loss": 0.8564, + "step": 23090 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005769249999999999, + "loss": 0.8385, + "step": 23100 + }, + { + "epoch": 0.23, + "learning_rate": 0.000577175, + "loss": 0.8285, + "step": 23110 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005774249999999999, + "loss": 0.8194, + "step": 23120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005776749999999999, + "loss": 0.814, + "step": 23130 + }, + { + "epoch": 0.23, + "learning_rate": 0.000577925, + "loss": 0.8261, + "step": 23140 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005781749999999999, + "loss": 0.8141, + "step": 23150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005784249999999999, + "loss": 0.8089, + "step": 23160 + }, + { + "epoch": 0.23, + "learning_rate": 0.000578675, + "loss": 0.8003, + "step": 23170 + }, + { + "epoch": 0.23, + "learning_rate": 0.000578925, + "loss": 0.8148, + "step": 23180 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005791749999999999, + "loss": 0.814, + "step": 23190 + }, + { + "epoch": 0.23, + "learning_rate": 0.000579425, + "loss": 0.8206, + "step": 23200 + }, + { + "epoch": 0.23, + "learning_rate": 0.000579675, + "loss": 0.7938, + "step": 23210 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005799249999999999, + "loss": 0.8371, + "step": 23220 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005801749999999999, + "loss": 0.8515, + "step": 23230 + }, + { + "epoch": 0.23, + "learning_rate": 0.000580425, + "loss": 0.853, + "step": 23240 + }, + { + "epoch": 0.23, + "learning_rate": 0.000580675, + "loss": 0.8462, + "step": 23250 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005809249999999999, + "loss": 0.8354, + "step": 23260 + }, + { + "epoch": 0.23, + "learning_rate": 0.000581175, + "loss": 0.8352, + "step": 23270 + }, + { + "epoch": 0.23, + "learning_rate": 0.000581425, + "loss": 0.8431, + "step": 23280 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005816749999999999, + "loss": 0.8469, + "step": 23290 + }, + { + "epoch": 0.23, + "learning_rate": 0.000581925, + "loss": 0.8279, + "step": 23300 + }, + { + "epoch": 0.23, + "learning_rate": 0.000582175, + "loss": 0.8322, + "step": 23310 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005824249999999999, + "loss": 0.8279, + "step": 23320 + }, + { + "epoch": 0.23, + "learning_rate": 0.000582675, + "loss": 0.8376, + "step": 23330 + }, + { + "epoch": 0.23, + "learning_rate": 0.000582925, + "loss": 0.8371, + "step": 23340 + }, + { + "epoch": 0.23, + "learning_rate": 0.000583175, + "loss": 0.8421, + "step": 23350 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005834249999999999, + "loss": 0.8337, + "step": 23360 + }, + { + "epoch": 0.23, + "learning_rate": 0.000583675, + "loss": 0.8473, + "step": 23370 + }, + { + "epoch": 0.23, + "learning_rate": 0.000583925, + "loss": 0.8464, + "step": 23380 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005841749999999999, + "loss": 0.8401, + "step": 23390 + }, + { + "epoch": 0.23, + "learning_rate": 0.000584425, + "loss": 0.849, + "step": 23400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005846749999999999, + "loss": 0.8351, + "step": 23410 + }, + { + "epoch": 0.23, + "learning_rate": 0.000584925, + "loss": 0.8496, + "step": 23420 + }, + { + "epoch": 0.23, + "learning_rate": 0.000585175, + "loss": 0.8413, + "step": 23430 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005854249999999999, + "loss": 0.8396, + "step": 23440 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005856749999999999, + "loss": 0.8265, + "step": 23450 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005859249999999999, + "loss": 0.8421, + "step": 23460 + }, + { + "epoch": 0.23, + "learning_rate": 0.000586175, + "loss": 0.8418, + "step": 23470 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005864249999999999, + "loss": 0.8253, + "step": 23480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0005866749999999999, + "loss": 0.7979, + "step": 23490 + }, + { + "epoch": 0.23, + "learning_rate": 0.000586925, + "loss": 0.823, + "step": 23500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005871749999999999, + "loss": 0.839, + "step": 23510 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005874249999999999, + "loss": 0.8571, + "step": 23520 + }, + { + "epoch": 0.24, + "learning_rate": 0.000587675, + "loss": 0.8338, + "step": 23530 + }, + { + "epoch": 0.24, + "learning_rate": 0.000587925, + "loss": 0.8443, + "step": 23540 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005881749999999999, + "loss": 0.8499, + "step": 23550 + }, + { + "epoch": 0.24, + "learning_rate": 0.000588425, + "loss": 0.8371, + "step": 23560 + }, + { + "epoch": 0.24, + "learning_rate": 0.000588675, + "loss": 0.833, + "step": 23570 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005889249999999999, + "loss": 0.8266, + "step": 23580 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005891749999999999, + "loss": 0.8274, + "step": 23590 + }, + { + "epoch": 0.24, + "learning_rate": 0.000589425, + "loss": 0.8249, + "step": 23600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005896749999999999, + "loss": 0.8405, + "step": 23610 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005899249999999999, + "loss": 0.8365, + "step": 23620 + }, + { + "epoch": 0.24, + "learning_rate": 0.000590175, + "loss": 0.8367, + "step": 23630 + }, + { + "epoch": 0.24, + "learning_rate": 0.000590425, + "loss": 0.8378, + "step": 23640 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005906749999999999, + "loss": 0.8411, + "step": 23650 + }, + { + "epoch": 0.24, + "learning_rate": 0.000590925, + "loss": 0.8432, + "step": 23660 + }, + { + "epoch": 0.24, + "learning_rate": 0.000591175, + "loss": 0.8525, + "step": 23670 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005914249999999999, + "loss": 0.8394, + "step": 23680 + }, + { + "epoch": 0.24, + "learning_rate": 0.000591675, + "loss": 0.8471, + "step": 23690 + }, + { + "epoch": 0.24, + "learning_rate": 0.000591925, + "loss": 0.8306, + "step": 23700 + }, + { + "epoch": 0.24, + "learning_rate": 0.000592175, + "loss": 0.8147, + "step": 23710 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005924249999999999, + "loss": 0.8235, + "step": 23720 + }, + { + "epoch": 0.24, + "learning_rate": 0.000592675, + "loss": 0.8362, + "step": 23730 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005928999999999999, + "loss": 0.8339, + "step": 23740 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059315, + "loss": 0.839, + "step": 23750 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005933999999999999, + "loss": 0.8156, + "step": 23760 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005936499999999999, + "loss": 0.8529, + "step": 23770 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005939, + "loss": 0.8476, + "step": 23780 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005941499999999999, + "loss": 0.8443, + "step": 23790 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005943999999999999, + "loss": 0.8345, + "step": 23800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059465, + "loss": 0.8323, + "step": 23810 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005949, + "loss": 0.835, + "step": 23820 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005951499999999999, + "loss": 0.8255, + "step": 23830 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005953999999999999, + "loss": 0.8313, + "step": 23840 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059565, + "loss": 0.8261, + "step": 23850 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005958999999999999, + "loss": 0.8401, + "step": 23860 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005961499999999999, + "loss": 0.8233, + "step": 23870 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005964, + "loss": 0.8306, + "step": 23880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005966499999999999, + "loss": 0.843, + "step": 23890 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005968999999999999, + "loss": 0.8355, + "step": 23900 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059715, + "loss": 0.8333, + "step": 23910 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005974, + "loss": 0.8079, + "step": 23920 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005976499999999999, + "loss": 0.8175, + "step": 23930 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005979, + "loss": 0.8355, + "step": 23940 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059815, + "loss": 0.8416, + "step": 23950 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005983999999999999, + "loss": 0.8322, + "step": 23960 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005986499999999999, + "loss": 0.839, + "step": 23970 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005989, + "loss": 0.8322, + "step": 23980 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059915, + "loss": 0.8334, + "step": 23990 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005993999999999999, + "loss": 0.8202, + "step": 24000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00059965, + "loss": 0.8244, + "step": 24010 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005999, + "loss": 0.8304, + "step": 24020 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005999526315789473, + "loss": 0.8353, + "step": 24030 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005998736842105262, + "loss": 0.8389, + "step": 24040 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005997947368421052, + "loss": 0.8423, + "step": 24050 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005997157894736841, + "loss": 0.8378, + "step": 24060 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005996368421052631, + "loss": 0.8273, + "step": 24070 + }, + { + "epoch": 0.24, + "learning_rate": 0.000599557894736842, + "loss": 0.8392, + "step": 24080 + }, + { + "epoch": 0.24, + "learning_rate": 0.000599478947368421, + "loss": 0.8372, + "step": 24090 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005993999999999999, + "loss": 0.8344, + "step": 24100 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005993210526315789, + "loss": 0.8248, + "step": 24110 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005992421052631578, + "loss": 0.8391, + "step": 24120 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005991631578947368, + "loss": 0.8254, + "step": 24130 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005990842105263157, + "loss": 0.8272, + "step": 24140 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005990052631578947, + "loss": 0.8237, + "step": 24150 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005989263157894736, + "loss": 0.8403, + "step": 24160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005988473684210526, + "loss": 0.8436, + "step": 24170 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005987684210526315, + "loss": 0.8277, + "step": 24180 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005986894736842105, + "loss": 0.8273, + "step": 24190 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005986105263157894, + "loss": 0.8324, + "step": 24200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005985315789473684, + "loss": 0.8262, + "step": 24210 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005984526315789473, + "loss": 0.8249, + "step": 24220 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005983736842105263, + "loss": 0.8301, + "step": 24230 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005982947368421052, + "loss": 0.8368, + "step": 24240 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005982157894736842, + "loss": 0.8256, + "step": 24250 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005981368421052631, + "loss": 0.8332, + "step": 24260 + }, + { + "epoch": 0.24, + "learning_rate": 0.000598057894736842, + "loss": 0.8273, + "step": 24270 + }, + { + "epoch": 0.24, + "learning_rate": 0.000597978947368421, + "loss": 0.823, + "step": 24280 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005979, + "loss": 0.8095, + "step": 24290 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005978210526315789, + "loss": 0.8352, + "step": 24300 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005977421052631578, + "loss": 0.8351, + "step": 24310 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005976631578947368, + "loss": 0.8267, + "step": 24320 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005975842105263157, + "loss": 0.8318, + "step": 24330 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005975052631578947, + "loss": 0.8272, + "step": 24340 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005974263157894736, + "loss": 0.8129, + "step": 24350 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005973473684210526, + "loss": 0.8201, + "step": 24360 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005972684210526315, + "loss": 0.8211, + "step": 24370 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005971894736842105, + "loss": 0.8217, + "step": 24380 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005971105263157894, + "loss": 0.8114, + "step": 24390 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005970315789473684, + "loss": 0.8189, + "step": 24400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005969526315789473, + "loss": 0.8383, + "step": 24410 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005968736842105263, + "loss": 0.8187, + "step": 24420 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005967947368421052, + "loss": 0.8175, + "step": 24430 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005967157894736842, + "loss": 0.8105, + "step": 24440 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005966368421052631, + "loss": 0.8205, + "step": 24450 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005965578947368421, + "loss": 0.8291, + "step": 24460 + }, + { + "epoch": 0.24, + "learning_rate": 0.000596478947368421, + "loss": 0.8141, + "step": 24470 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005964, + "loss": 0.8309, + "step": 24480 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005963210526315789, + "loss": 0.8155, + "step": 24490 + }, + { + "epoch": 0.24, + "learning_rate": 0.0005962421052631579, + "loss": 0.8303, + "step": 24500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005961631578947368, + "loss": 0.8319, + "step": 24510 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005960842105263158, + "loss": 0.8229, + "step": 24520 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005960052631578947, + "loss": 0.8236, + "step": 24530 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005959263157894737, + "loss": 0.8219, + "step": 24540 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005958473684210526, + "loss": 0.8101, + "step": 24550 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005957684210526314, + "loss": 0.8133, + "step": 24560 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005956894736842105, + "loss": 0.8224, + "step": 24570 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005956105263157895, + "loss": 0.8266, + "step": 24580 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005955315789473684, + "loss": 0.8204, + "step": 24590 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005954526315789473, + "loss": 0.8312, + "step": 24600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005953736842105263, + "loss": 0.8187, + "step": 24610 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005952947368421052, + "loss": 0.8262, + "step": 24620 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005952157894736841, + "loss": 0.8141, + "step": 24630 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005951368421052631, + "loss": 0.8192, + "step": 24640 + }, + { + "epoch": 0.25, + "learning_rate": 0.000595057894736842, + "loss": 0.8178, + "step": 24650 + }, + { + "epoch": 0.25, + "learning_rate": 0.000594978947368421, + "loss": 0.8326, + "step": 24660 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005949, + "loss": 0.8171, + "step": 24670 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005948210526315789, + "loss": 0.8305, + "step": 24680 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005947421052631579, + "loss": 0.8214, + "step": 24690 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005946631578947367, + "loss": 0.8161, + "step": 24700 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005945842105263158, + "loss": 0.8281, + "step": 24710 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005945052631578946, + "loss": 0.8173, + "step": 24720 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005944263157894737, + "loss": 0.819, + "step": 24730 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005943473684210525, + "loss": 0.8153, + "step": 24740 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005942763157894737, + "loss": 0.8132, + "step": 24750 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005941973684210525, + "loss": 0.8105, + "step": 24760 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005941184210526316, + "loss": 0.8286, + "step": 24770 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005940394736842104, + "loss": 0.8345, + "step": 24780 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005939605263157894, + "loss": 0.8198, + "step": 24790 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005938815789473683, + "loss": 0.8296, + "step": 24800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005938026315789473, + "loss": 0.8112, + "step": 24810 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005937236842105263, + "loss": 0.8217, + "step": 24820 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005936447368421052, + "loss": 0.8239, + "step": 24830 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005935657894736842, + "loss": 0.8473, + "step": 24840 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005934868421052631, + "loss": 0.8215, + "step": 24850 + }, + { + "epoch": 0.25, + "learning_rate": 0.000593407894736842, + "loss": 0.798, + "step": 24860 + }, + { + "epoch": 0.25, + "learning_rate": 0.000593328947368421, + "loss": 0.8289, + "step": 24870 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005932499999999999, + "loss": 0.8461, + "step": 24880 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005931710526315789, + "loss": 0.8384, + "step": 24890 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005930921052631578, + "loss": 0.8234, + "step": 24900 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005930131578947369, + "loss": 0.8142, + "step": 24910 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005929342105263157, + "loss": 0.8214, + "step": 24920 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005928552631578947, + "loss": 0.8247, + "step": 24930 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005927763157894736, + "loss": 0.8213, + "step": 24940 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005926973684210526, + "loss": 0.8267, + "step": 24950 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005926184210526315, + "loss": 0.8101, + "step": 24960 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005925394736842105, + "loss": 0.8124, + "step": 24970 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005924605263157894, + "loss": 0.8121, + "step": 24980 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005923815789473684, + "loss": 0.8292, + "step": 24990 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005923026315789473, + "loss": 0.8129, + "step": 25000 + }, + { + "epoch": 0.25, + "eval_accuracy": 0.8254310925043975, + "eval_loss": 0.86767578125, + "eval_runtime": 96.7476, + "eval_samples_per_second": 826.894, + "eval_steps_per_second": 1.623, + "step": 25000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005922236842105262, + "loss": 0.8243, + "step": 25010 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005921447368421052, + "loss": 0.8351, + "step": 25020 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005920657894736841, + "loss": 0.8311, + "step": 25030 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005919868421052631, + "loss": 0.8154, + "step": 25040 + }, + { + "epoch": 0.25, + "learning_rate": 0.000591907894736842, + "loss": 0.8111, + "step": 25050 + }, + { + "epoch": 0.25, + "learning_rate": 0.000591828947368421, + "loss": 0.8188, + "step": 25060 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005917499999999999, + "loss": 0.8169, + "step": 25070 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005916710526315789, + "loss": 0.8221, + "step": 25080 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005915921052631578, + "loss": 0.8105, + "step": 25090 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005915131578947368, + "loss": 0.8269, + "step": 25100 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005914342105263157, + "loss": 0.8161, + "step": 25110 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005913552631578947, + "loss": 0.815, + "step": 25120 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005912763157894736, + "loss": 0.8187, + "step": 25130 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005911973684210526, + "loss": 0.8193, + "step": 25140 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005911184210526315, + "loss": 0.8331, + "step": 25150 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005910394736842105, + "loss": 0.818, + "step": 25160 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005909605263157894, + "loss": 0.8096, + "step": 25170 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005908815789473684, + "loss": 0.7797, + "step": 25180 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005908026315789473, + "loss": 0.8, + "step": 25190 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005907236842105263, + "loss": 0.7881, + "step": 25200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005906447368421052, + "loss": 0.7861, + "step": 25210 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005905657894736842, + "loss": 0.793, + "step": 25220 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005904868421052631, + "loss": 0.8112, + "step": 25230 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005904078947368421, + "loss": 0.8275, + "step": 25240 + }, + { + "epoch": 0.25, + "learning_rate": 0.000590328947368421, + "loss": 0.8055, + "step": 25250 + }, + { + "epoch": 0.25, + "learning_rate": 0.00059025, + "loss": 0.8217, + "step": 25260 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005901710526315789, + "loss": 0.8035, + "step": 25270 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005900921052631579, + "loss": 0.8117, + "step": 25280 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005900131578947368, + "loss": 0.8194, + "step": 25290 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005899342105263157, + "loss": 0.8228, + "step": 25300 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005898552631578947, + "loss": 0.7978, + "step": 25310 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005897763157894736, + "loss": 0.8268, + "step": 25320 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005896973684210526, + "loss": 0.807, + "step": 25330 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005896184210526315, + "loss": 0.81, + "step": 25340 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005895394736842105, + "loss": 0.8109, + "step": 25350 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005894605263157894, + "loss": 0.8241, + "step": 25360 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005893815789473684, + "loss": 0.8234, + "step": 25370 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005893026315789473, + "loss": 0.8096, + "step": 25380 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005892236842105263, + "loss": 0.8234, + "step": 25390 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005891447368421052, + "loss": 0.8193, + "step": 25400 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005890657894736842, + "loss": 0.8166, + "step": 25410 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005889868421052631, + "loss": 0.821, + "step": 25420 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005889078947368421, + "loss": 0.8112, + "step": 25430 + }, + { + "epoch": 0.25, + "learning_rate": 0.000588828947368421, + "loss": 0.8002, + "step": 25440 + }, + { + "epoch": 0.25, + "learning_rate": 0.00058875, + "loss": 0.8111, + "step": 25450 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005886710526315789, + "loss": 0.8138, + "step": 25460 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005885921052631579, + "loss": 0.8053, + "step": 25470 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005885131578947368, + "loss": 0.795, + "step": 25480 + }, + { + "epoch": 0.25, + "learning_rate": 0.0005884342105263158, + "loss": 0.8374, + "step": 25490 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005883552631578947, + "loss": 0.8137, + "step": 25500 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005882763157894737, + "loss": 0.8247, + "step": 25510 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005881973684210526, + "loss": 0.8148, + "step": 25520 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005881184210526316, + "loss": 0.8125, + "step": 25530 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005880394736842104, + "loss": 0.8205, + "step": 25540 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005879605263157895, + "loss": 0.8077, + "step": 25550 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005878815789473683, + "loss": 0.8192, + "step": 25560 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005878026315789474, + "loss": 0.8213, + "step": 25570 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005877236842105263, + "loss": 0.8145, + "step": 25580 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005876447368421052, + "loss": 0.8032, + "step": 25590 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005875657894736842, + "loss": 0.8245, + "step": 25600 + }, + { + "epoch": 0.26, + "learning_rate": 0.000587486842105263, + "loss": 0.8164, + "step": 25610 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005874078947368421, + "loss": 0.7993, + "step": 25620 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005873289473684209, + "loss": 0.8149, + "step": 25630 + }, + { + "epoch": 0.26, + "learning_rate": 0.00058725, + "loss": 0.8116, + "step": 25640 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005871710526315788, + "loss": 0.8087, + "step": 25650 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005870921052631579, + "loss": 0.8107, + "step": 25660 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005870131578947368, + "loss": 0.807, + "step": 25670 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005869342105263158, + "loss": 0.8366, + "step": 25680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005868552631578947, + "loss": 0.8207, + "step": 25690 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005867763157894736, + "loss": 0.8152, + "step": 25700 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005866973684210526, + "loss": 0.8138, + "step": 25710 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005866184210526315, + "loss": 0.819, + "step": 25720 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005865394736842105, + "loss": 0.8066, + "step": 25730 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005864605263157894, + "loss": 0.8157, + "step": 25740 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005863894736842105, + "loss": 0.8029, + "step": 25750 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005863105263157894, + "loss": 0.8109, + "step": 25760 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005862315789473683, + "loss": 0.825, + "step": 25770 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005861526315789473, + "loss": 0.8045, + "step": 25780 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005860736842105262, + "loss": 0.8127, + "step": 25790 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005859947368421052, + "loss": 0.8105, + "step": 25800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005859157894736841, + "loss": 0.8017, + "step": 25810 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005858368421052632, + "loss": 0.8192, + "step": 25820 + }, + { + "epoch": 0.26, + "learning_rate": 0.000585757894736842, + "loss": 0.8236, + "step": 25830 + }, + { + "epoch": 0.26, + "learning_rate": 0.000585678947368421, + "loss": 0.8142, + "step": 25840 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005855999999999999, + "loss": 0.8061, + "step": 25850 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005855210526315789, + "loss": 0.8054, + "step": 25860 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005854421052631578, + "loss": 0.8058, + "step": 25870 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005853631578947368, + "loss": 0.8033, + "step": 25880 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005852842105263157, + "loss": 0.8201, + "step": 25890 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005852052631578947, + "loss": 0.8065, + "step": 25900 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005851263157894736, + "loss": 0.8104, + "step": 25910 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005850473684210526, + "loss": 0.7849, + "step": 25920 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005849684210526315, + "loss": 0.7978, + "step": 25930 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005848894736842105, + "loss": 0.812, + "step": 25940 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005848105263157894, + "loss": 0.8091, + "step": 25950 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005847315789473684, + "loss": 0.8145, + "step": 25960 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005846526315789473, + "loss": 0.8113, + "step": 25970 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005845736842105263, + "loss": 0.8041, + "step": 25980 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005844947368421052, + "loss": 0.8074, + "step": 25990 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005844157894736841, + "loss": 0.805, + "step": 26000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005843368421052631, + "loss": 0.8021, + "step": 26010 + }, + { + "epoch": 0.26, + "learning_rate": 0.000584257894736842, + "loss": 0.8067, + "step": 26020 + }, + { + "epoch": 0.26, + "learning_rate": 0.000584178947368421, + "loss": 0.8017, + "step": 26030 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005840999999999999, + "loss": 0.8088, + "step": 26040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005840210526315789, + "loss": 0.8108, + "step": 26050 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005839421052631578, + "loss": 0.7941, + "step": 26060 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005838631578947368, + "loss": 0.8195, + "step": 26070 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005837842105263157, + "loss": 0.8182, + "step": 26080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005837052631578947, + "loss": 0.8138, + "step": 26090 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005836263157894736, + "loss": 0.8126, + "step": 26100 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005835473684210526, + "loss": 0.817, + "step": 26110 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005834684210526315, + "loss": 0.81, + "step": 26120 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005833894736842105, + "loss": 0.7905, + "step": 26130 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005833105263157894, + "loss": 0.8001, + "step": 26140 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005832315789473684, + "loss": 0.8134, + "step": 26150 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005831526315789473, + "loss": 0.8116, + "step": 26160 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005830736842105263, + "loss": 0.7971, + "step": 26170 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005829947368421052, + "loss": 0.795, + "step": 26180 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005829157894736842, + "loss": 0.8128, + "step": 26190 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005828368421052631, + "loss": 0.8144, + "step": 26200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005827578947368421, + "loss": 0.8022, + "step": 26210 + }, + { + "epoch": 0.26, + "learning_rate": 0.000582678947368421, + "loss": 0.8091, + "step": 26220 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005826, + "loss": 0.7954, + "step": 26230 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005825210526315789, + "loss": 0.797, + "step": 26240 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005824421052631579, + "loss": 0.8183, + "step": 26250 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005823631578947368, + "loss": 0.8078, + "step": 26260 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005822842105263158, + "loss": 0.8063, + "step": 26270 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005822052631578947, + "loss": 0.7925, + "step": 26280 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005821263157894736, + "loss": 0.7894, + "step": 26290 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005820473684210526, + "loss": 0.7882, + "step": 26300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005819684210526315, + "loss": 0.7932, + "step": 26310 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005818894736842105, + "loss": 0.7961, + "step": 26320 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005818105263157894, + "loss": 0.7923, + "step": 26330 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005817315789473684, + "loss": 0.7974, + "step": 26340 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005816526315789473, + "loss": 0.798, + "step": 26350 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005815736842105263, + "loss": 0.792, + "step": 26360 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005814947368421052, + "loss": 0.8103, + "step": 26370 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005814157894736842, + "loss": 0.8009, + "step": 26380 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005813368421052631, + "loss": 0.8062, + "step": 26390 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005812578947368421, + "loss": 0.8107, + "step": 26400 + }, + { + "epoch": 0.26, + "learning_rate": 0.000581178947368421, + "loss": 0.814, + "step": 26410 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005811, + "loss": 0.8003, + "step": 26420 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005810210526315789, + "loss": 0.7893, + "step": 26430 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005809421052631579, + "loss": 0.7695, + "step": 26440 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005808631578947367, + "loss": 0.7856, + "step": 26450 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005807842105263158, + "loss": 0.7692, + "step": 26460 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005807052631578947, + "loss": 0.7626, + "step": 26470 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005806263157894737, + "loss": 0.7926, + "step": 26480 + }, + { + "epoch": 0.26, + "learning_rate": 0.0005805473684210526, + "loss": 0.8006, + "step": 26490 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005804684210526316, + "loss": 0.802, + "step": 26500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005803894736842105, + "loss": 0.8092, + "step": 26510 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005803105263157894, + "loss": 0.8018, + "step": 26520 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005802315789473684, + "loss": 0.7875, + "step": 26530 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005801526315789472, + "loss": 0.8163, + "step": 26540 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005800736842105263, + "loss": 0.7923, + "step": 26550 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005799947368421053, + "loss": 0.8019, + "step": 26560 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005799157894736842, + "loss": 0.8018, + "step": 26570 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005798368421052631, + "loss": 0.7946, + "step": 26580 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005797578947368421, + "loss": 0.7982, + "step": 26590 + }, + { + "epoch": 0.27, + "learning_rate": 0.000579678947368421, + "loss": 0.7962, + "step": 26600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005795999999999999, + "loss": 0.797, + "step": 26610 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005795210526315789, + "loss": 0.7993, + "step": 26620 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005794421052631578, + "loss": 0.7906, + "step": 26630 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005793631578947368, + "loss": 0.8111, + "step": 26640 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005792842105263157, + "loss": 0.8141, + "step": 26650 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005792052631578947, + "loss": 0.8079, + "step": 26660 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005791263157894737, + "loss": 0.7938, + "step": 26670 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005790473684210525, + "loss": 0.8108, + "step": 26680 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005789684210526316, + "loss": 0.8085, + "step": 26690 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005788894736842104, + "loss": 0.7985, + "step": 26700 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005788105263157895, + "loss": 0.7647, + "step": 26710 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005787315789473683, + "loss": 0.8048, + "step": 26720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005786526315789474, + "loss": 0.7926, + "step": 26730 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005785736842105262, + "loss": 0.8004, + "step": 26740 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005785026315789473, + "loss": 0.7965, + "step": 26750 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005784236842105262, + "loss": 0.8007, + "step": 26760 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005783447368421052, + "loss": 0.7922, + "step": 26770 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005782657894736841, + "loss": 0.8026, + "step": 26780 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005781868421052631, + "loss": 0.8007, + "step": 26790 + }, + { + "epoch": 0.27, + "learning_rate": 0.000578107894736842, + "loss": 0.7939, + "step": 26800 + }, + { + "epoch": 0.27, + "learning_rate": 0.000578028947368421, + "loss": 0.795, + "step": 26810 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005779499999999999, + "loss": 0.7852, + "step": 26820 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005778710526315789, + "loss": 0.7874, + "step": 26830 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005777921052631578, + "loss": 0.7885, + "step": 26840 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005777131578947368, + "loss": 0.7963, + "step": 26850 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005776342105263157, + "loss": 0.795, + "step": 26860 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005775552631578947, + "loss": 0.8115, + "step": 26870 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005774763157894736, + "loss": 0.7849, + "step": 26880 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005773973684210526, + "loss": 0.7789, + "step": 26890 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005773184210526315, + "loss": 0.8073, + "step": 26900 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005772394736842105, + "loss": 0.7947, + "step": 26910 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005771605263157894, + "loss": 0.7839, + "step": 26920 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005770815789473684, + "loss": 0.8056, + "step": 26930 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005770026315789473, + "loss": 0.7972, + "step": 26940 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005769236842105263, + "loss": 0.8008, + "step": 26950 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005768447368421052, + "loss": 0.7911, + "step": 26960 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005767657894736842, + "loss": 0.8002, + "step": 26970 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005766868421052631, + "loss": 0.7812, + "step": 26980 + }, + { + "epoch": 0.27, + "learning_rate": 0.000576607894736842, + "loss": 0.7864, + "step": 26990 + }, + { + "epoch": 0.27, + "learning_rate": 0.000576528947368421, + "loss": 0.7809, + "step": 27000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005764499999999999, + "loss": 0.8083, + "step": 27010 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005763710526315789, + "loss": 0.7935, + "step": 27020 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005762921052631578, + "loss": 0.79, + "step": 27030 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005762131578947368, + "loss": 0.7868, + "step": 27040 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005761342105263157, + "loss": 0.7978, + "step": 27050 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005760552631578947, + "loss": 0.7975, + "step": 27060 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005759763157894736, + "loss": 0.7971, + "step": 27070 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005758973684210526, + "loss": 0.7861, + "step": 27080 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005758184210526315, + "loss": 0.7807, + "step": 27090 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005757394736842105, + "loss": 0.7933, + "step": 27100 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005756605263157894, + "loss": 0.8048, + "step": 27110 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005755815789473684, + "loss": 0.7791, + "step": 27120 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005755026315789473, + "loss": 0.7752, + "step": 27130 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005754236842105263, + "loss": 0.7753, + "step": 27140 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005753447368421052, + "loss": 0.7925, + "step": 27150 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005752657894736842, + "loss": 0.7894, + "step": 27160 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005751868421052631, + "loss": 0.7974, + "step": 27170 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005751078947368421, + "loss": 0.7849, + "step": 27180 + }, + { + "epoch": 0.27, + "learning_rate": 0.000575028947368421, + "loss": 0.8053, + "step": 27190 + }, + { + "epoch": 0.27, + "learning_rate": 0.00057495, + "loss": 0.8107, + "step": 27200 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005748710526315789, + "loss": 0.7912, + "step": 27210 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005747921052631579, + "loss": 0.7863, + "step": 27220 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005747131578947368, + "loss": 0.7913, + "step": 27230 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005746342105263158, + "loss": 0.7951, + "step": 27240 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005745552631578947, + "loss": 0.7872, + "step": 27250 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005744763157894737, + "loss": 0.7988, + "step": 27260 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005743973684210526, + "loss": 0.8092, + "step": 27270 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005743184210526315, + "loss": 0.7866, + "step": 27280 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005742394736842105, + "loss": 0.7957, + "step": 27290 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005741605263157894, + "loss": 0.7757, + "step": 27300 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005740815789473684, + "loss": 0.792, + "step": 27310 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005740026315789473, + "loss": 0.7942, + "step": 27320 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005739236842105263, + "loss": 0.7846, + "step": 27330 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005738447368421052, + "loss": 0.7727, + "step": 27340 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005737657894736842, + "loss": 0.7893, + "step": 27350 + }, + { + "epoch": 0.27, + "learning_rate": 0.000573686842105263, + "loss": 0.807, + "step": 27360 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005736078947368421, + "loss": 0.8077, + "step": 27370 + }, + { + "epoch": 0.27, + "learning_rate": 0.000573528947368421, + "loss": 0.7967, + "step": 27380 + }, + { + "epoch": 0.27, + "learning_rate": 0.00057345, + "loss": 0.7939, + "step": 27390 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005733710526315789, + "loss": 0.7984, + "step": 27400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005732921052631579, + "loss": 0.7908, + "step": 27410 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005732131578947368, + "loss": 0.7929, + "step": 27420 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005731342105263157, + "loss": 0.7869, + "step": 27430 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005730552631578947, + "loss": 0.7889, + "step": 27440 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005729763157894736, + "loss": 0.7958, + "step": 27450 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005728973684210526, + "loss": 0.7759, + "step": 27460 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005728184210526316, + "loss": 0.7919, + "step": 27470 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005727394736842105, + "loss": 0.7901, + "step": 27480 + }, + { + "epoch": 0.27, + "learning_rate": 0.0005726605263157895, + "loss": 0.7976, + "step": 27490 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005725815789473684, + "loss": 0.7826, + "step": 27500 + }, + { + "epoch": 0.28, + "eval_accuracy": 0.832012190127962, + "eval_loss": 0.82568359375, + "eval_runtime": 96.9614, + "eval_samples_per_second": 825.071, + "eval_steps_per_second": 1.619, + "step": 27500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005725026315789474, + "loss": 0.7844, + "step": 27510 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005724236842105262, + "loss": 0.7746, + "step": 27520 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005723447368421053, + "loss": 0.7749, + "step": 27530 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005722657894736841, + "loss": 0.7559, + "step": 27540 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005721868421052632, + "loss": 0.7876, + "step": 27550 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005721078947368421, + "loss": 0.7792, + "step": 27560 + }, + { + "epoch": 0.28, + "learning_rate": 0.000572028947368421, + "loss": 0.7734, + "step": 27570 + }, + { + "epoch": 0.28, + "learning_rate": 0.00057195, + "loss": 0.7701, + "step": 27580 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005718710526315788, + "loss": 0.7835, + "step": 27590 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005717921052631579, + "loss": 0.7688, + "step": 27600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005717131578947367, + "loss": 0.8105, + "step": 27610 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005716342105263158, + "loss": 0.8011, + "step": 27620 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005715552631578946, + "loss": 0.8044, + "step": 27630 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005714763157894737, + "loss": 0.8107, + "step": 27640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005713973684210525, + "loss": 0.8012, + "step": 27650 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005713184210526315, + "loss": 0.796, + "step": 27660 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005712394736842105, + "loss": 0.7865, + "step": 27670 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005711605263157894, + "loss": 0.7908, + "step": 27680 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005710815789473684, + "loss": 0.8021, + "step": 27690 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005710026315789473, + "loss": 0.7944, + "step": 27700 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005709236842105263, + "loss": 0.7863, + "step": 27710 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005708447368421052, + "loss": 0.7902, + "step": 27720 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005707657894736841, + "loss": 0.7787, + "step": 27730 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005706868421052631, + "loss": 0.7952, + "step": 27740 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005706157894736841, + "loss": 0.7887, + "step": 27750 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005705368421052631, + "loss": 0.7949, + "step": 27760 + }, + { + "epoch": 0.28, + "learning_rate": 0.000570457894736842, + "loss": 0.7929, + "step": 27770 + }, + { + "epoch": 0.28, + "learning_rate": 0.000570378947368421, + "loss": 0.802, + "step": 27780 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005702999999999999, + "loss": 0.7668, + "step": 27790 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005702210526315789, + "loss": 0.7895, + "step": 27800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005701421052631578, + "loss": 0.7788, + "step": 27810 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005700631578947368, + "loss": 0.7903, + "step": 27820 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005699842105263157, + "loss": 0.7973, + "step": 27830 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005699052631578947, + "loss": 0.8102, + "step": 27840 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005698263157894736, + "loss": 0.7954, + "step": 27850 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005697473684210526, + "loss": 0.7891, + "step": 27860 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005696684210526315, + "loss": 0.8007, + "step": 27870 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005695894736842105, + "loss": 0.8003, + "step": 27880 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005695105263157894, + "loss": 0.7952, + "step": 27890 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005694315789473684, + "loss": 0.807, + "step": 27900 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005693526315789473, + "loss": 0.7858, + "step": 27910 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005692736842105263, + "loss": 0.7877, + "step": 27920 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005691947368421052, + "loss": 0.7896, + "step": 27930 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005691157894736842, + "loss": 0.7885, + "step": 27940 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005690368421052631, + "loss": 0.7758, + "step": 27950 + }, + { + "epoch": 0.28, + "learning_rate": 0.000568957894736842, + "loss": 0.8041, + "step": 27960 + }, + { + "epoch": 0.28, + "learning_rate": 0.000568878947368421, + "loss": 0.7975, + "step": 27970 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005688, + "loss": 0.789, + "step": 27980 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005687210526315789, + "loss": 0.7913, + "step": 27990 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005686421052631578, + "loss": 0.8035, + "step": 28000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005685631578947368, + "loss": 0.7875, + "step": 28010 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005684842105263157, + "loss": 0.7854, + "step": 28020 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005684052631578947, + "loss": 0.7866, + "step": 28030 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005683263157894736, + "loss": 0.7918, + "step": 28040 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005682473684210526, + "loss": 0.7825, + "step": 28050 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005681684210526315, + "loss": 0.7777, + "step": 28060 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005680894736842105, + "loss": 0.7947, + "step": 28070 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005680105263157894, + "loss": 0.7992, + "step": 28080 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005679315789473684, + "loss": 0.7978, + "step": 28090 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005678526315789473, + "loss": 0.8152, + "step": 28100 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005677736842105263, + "loss": 0.7904, + "step": 28110 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005676947368421052, + "loss": 0.8009, + "step": 28120 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005676157894736842, + "loss": 0.7671, + "step": 28130 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005675368421052631, + "loss": 0.7616, + "step": 28140 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005674578947368421, + "loss": 0.7662, + "step": 28150 + }, + { + "epoch": 0.28, + "learning_rate": 0.000567378947368421, + "loss": 0.7741, + "step": 28160 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005673, + "loss": 0.7594, + "step": 28170 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005672210526315789, + "loss": 0.7638, + "step": 28180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005671421052631579, + "loss": 0.7669, + "step": 28190 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005670631578947368, + "loss": 0.7689, + "step": 28200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005669842105263158, + "loss": 0.7642, + "step": 28210 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005669052631578947, + "loss": 0.7709, + "step": 28220 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005668263157894737, + "loss": 0.7563, + "step": 28230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005667473684210526, + "loss": 0.7715, + "step": 28240 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005666684210526316, + "loss": 0.7512, + "step": 28250 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005665894736842105, + "loss": 0.7723, + "step": 28260 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005665105263157893, + "loss": 0.7626, + "step": 28270 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005664315789473684, + "loss": 0.7751, + "step": 28280 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005663526315789473, + "loss": 0.7474, + "step": 28290 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005662736842105263, + "loss": 0.7722, + "step": 28300 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005661947368421052, + "loss": 0.7533, + "step": 28310 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005661157894736842, + "loss": 0.7673, + "step": 28320 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005660368421052631, + "loss": 0.7517, + "step": 28330 + }, + { + "epoch": 0.28, + "learning_rate": 0.000565957894736842, + "loss": 0.7431, + "step": 28340 + }, + { + "epoch": 0.28, + "learning_rate": 0.000565878947368421, + "loss": 0.7501, + "step": 28350 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005657999999999999, + "loss": 0.7459, + "step": 28360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005657210526315789, + "loss": 0.7557, + "step": 28370 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005656421052631579, + "loss": 0.7627, + "step": 28380 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005655631578947368, + "loss": 0.7589, + "step": 28390 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005654842105263158, + "loss": 0.7543, + "step": 28400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005654052631578947, + "loss": 0.7585, + "step": 28410 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005653263157894737, + "loss": 0.7612, + "step": 28420 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005652473684210525, + "loss": 0.7489, + "step": 28430 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005651684210526316, + "loss": 0.7545, + "step": 28440 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005650894736842104, + "loss": 0.7444, + "step": 28450 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005650105263157895, + "loss": 0.751, + "step": 28460 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005649315789473684, + "loss": 0.7676, + "step": 28470 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005648526315789474, + "loss": 0.7867, + "step": 28480 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005647736842105263, + "loss": 0.7744, + "step": 28490 + }, + { + "epoch": 0.28, + "learning_rate": 0.0005646947368421052, + "loss": 0.7803, + "step": 28500 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005646157894736842, + "loss": 0.7884, + "step": 28510 + }, + { + "epoch": 0.29, + "learning_rate": 0.000564536842105263, + "loss": 0.7509, + "step": 28520 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005644578947368421, + "loss": 0.7463, + "step": 28530 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005643789473684209, + "loss": 0.7696, + "step": 28540 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005643, + "loss": 0.7679, + "step": 28550 + }, + { + "epoch": 0.29, + "learning_rate": 0.000564221052631579, + "loss": 0.7665, + "step": 28560 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005641421052631578, + "loss": 0.7845, + "step": 28570 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005640631578947368, + "loss": 0.7717, + "step": 28580 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005639842105263157, + "loss": 0.7749, + "step": 28590 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005639052631578947, + "loss": 0.7643, + "step": 28600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005638263157894736, + "loss": 0.7721, + "step": 28610 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005637473684210526, + "loss": 0.7806, + "step": 28620 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005636684210526315, + "loss": 0.7703, + "step": 28630 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005635894736842104, + "loss": 0.775, + "step": 28640 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005635105263157895, + "loss": 0.7549, + "step": 28650 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005634315789473683, + "loss": 0.7824, + "step": 28660 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005633526315789474, + "loss": 0.777, + "step": 28670 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005632736842105262, + "loss": 0.7699, + "step": 28680 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005631947368421053, + "loss": 0.7834, + "step": 28690 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005631157894736841, + "loss": 0.7771, + "step": 28700 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005630368421052631, + "loss": 0.776, + "step": 28710 + }, + { + "epoch": 0.29, + "learning_rate": 0.000562957894736842, + "loss": 0.7718, + "step": 28720 + }, + { + "epoch": 0.29, + "learning_rate": 0.000562878947368421, + "loss": 0.7746, + "step": 28730 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005627999999999999, + "loss": 0.7664, + "step": 28740 + }, + { + "epoch": 0.29, + "learning_rate": 0.000562728947368421, + "loss": 0.7783, + "step": 28750 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005626499999999999, + "loss": 0.7738, + "step": 28760 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005625710526315789, + "loss": 0.7636, + "step": 28770 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005624921052631578, + "loss": 0.7785, + "step": 28780 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005624131578947368, + "loss": 0.7855, + "step": 28790 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005623342105263157, + "loss": 0.7823, + "step": 28800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005622552631578947, + "loss": 0.7698, + "step": 28810 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005621763157894736, + "loss": 0.7731, + "step": 28820 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005620973684210526, + "loss": 0.7695, + "step": 28830 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005620184210526315, + "loss": 0.7962, + "step": 28840 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005619394736842105, + "loss": 0.7916, + "step": 28850 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005618605263157894, + "loss": 0.7721, + "step": 28860 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005617815789473684, + "loss": 0.7727, + "step": 28870 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005617026315789473, + "loss": 0.7596, + "step": 28880 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005616236842105263, + "loss": 0.779, + "step": 28890 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005615447368421052, + "loss": 0.7699, + "step": 28900 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005614657894736842, + "loss": 0.7563, + "step": 28910 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005613868421052631, + "loss": 0.7691, + "step": 28920 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005613078947368421, + "loss": 0.762, + "step": 28930 + }, + { + "epoch": 0.29, + "learning_rate": 0.000561228947368421, + "loss": 0.758, + "step": 28940 + }, + { + "epoch": 0.29, + "learning_rate": 0.00056115, + "loss": 0.7634, + "step": 28950 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005610710526315789, + "loss": 0.7748, + "step": 28960 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005609921052631578, + "loss": 0.7597, + "step": 28970 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005609131578947368, + "loss": 0.7689, + "step": 28980 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005608342105263157, + "loss": 0.7684, + "step": 28990 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005607552631578947, + "loss": 0.7795, + "step": 29000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005606763157894736, + "loss": 0.7696, + "step": 29010 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005605973684210526, + "loss": 0.7731, + "step": 29020 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005605184210526315, + "loss": 0.7873, + "step": 29030 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005604394736842105, + "loss": 0.7711, + "step": 29040 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005603605263157894, + "loss": 0.7722, + "step": 29050 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005602815789473684, + "loss": 0.7827, + "step": 29060 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005602026315789473, + "loss": 0.7647, + "step": 29070 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005601236842105263, + "loss": 0.7693, + "step": 29080 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005600447368421052, + "loss": 0.7714, + "step": 29090 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005599657894736842, + "loss": 0.777, + "step": 29100 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005598868421052631, + "loss": 0.7623, + "step": 29110 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005598078947368421, + "loss": 0.7806, + "step": 29120 + }, + { + "epoch": 0.29, + "learning_rate": 0.000559728947368421, + "loss": 0.7622, + "step": 29130 + }, + { + "epoch": 0.29, + "learning_rate": 0.00055965, + "loss": 0.7731, + "step": 29140 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005595710526315789, + "loss": 0.7647, + "step": 29150 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005594921052631579, + "loss": 0.787, + "step": 29160 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005594131578947368, + "loss": 0.7716, + "step": 29170 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005593342105263158, + "loss": 0.7641, + "step": 29180 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005592552631578947, + "loss": 0.7624, + "step": 29190 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005591763157894737, + "loss": 0.7725, + "step": 29200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005590973684210526, + "loss": 0.7749, + "step": 29210 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005590184210526316, + "loss": 0.7672, + "step": 29220 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005589394736842105, + "loss": 0.7671, + "step": 29230 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005588605263157895, + "loss": 0.7642, + "step": 29240 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005587815789473684, + "loss": 0.7693, + "step": 29250 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005587026315789473, + "loss": 0.7706, + "step": 29260 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005586236842105262, + "loss": 0.7733, + "step": 29270 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005585447368421052, + "loss": 0.7648, + "step": 29280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005584657894736842, + "loss": 0.7538, + "step": 29290 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005583868421052631, + "loss": 0.7698, + "step": 29300 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005583078947368421, + "loss": 0.7615, + "step": 29310 + }, + { + "epoch": 0.29, + "learning_rate": 0.000558228947368421, + "loss": 0.7724, + "step": 29320 + }, + { + "epoch": 0.29, + "learning_rate": 0.00055815, + "loss": 0.745, + "step": 29330 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005580710526315788, + "loss": 0.7809, + "step": 29340 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005579921052631579, + "loss": 0.7832, + "step": 29350 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005579131578947367, + "loss": 0.7819, + "step": 29360 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005578342105263158, + "loss": 0.7809, + "step": 29370 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005577552631578947, + "loss": 0.7701, + "step": 29380 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005576763157894737, + "loss": 0.7612, + "step": 29390 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005575973684210526, + "loss": 0.7629, + "step": 29400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005575184210526315, + "loss": 0.7665, + "step": 29410 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005574394736842105, + "loss": 0.7607, + "step": 29420 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005573605263157894, + "loss": 0.7558, + "step": 29430 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005572815789473684, + "loss": 0.7546, + "step": 29440 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005572026315789473, + "loss": 0.7674, + "step": 29450 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005571236842105263, + "loss": 0.7651, + "step": 29460 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005570447368421053, + "loss": 0.7576, + "step": 29470 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005569657894736841, + "loss": 0.7815, + "step": 29480 + }, + { + "epoch": 0.29, + "learning_rate": 0.0005568868421052632, + "loss": 0.7652, + "step": 29490 + }, + { + "epoch": 0.29, + "learning_rate": 0.000556807894736842, + "loss": 0.7596, + "step": 29500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005567289473684211, + "loss": 0.7656, + "step": 29510 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005566499999999999, + "loss": 0.7722, + "step": 29520 + }, + { + "epoch": 0.3, + "learning_rate": 0.000556571052631579, + "loss": 0.7622, + "step": 29530 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005564921052631578, + "loss": 0.783, + "step": 29540 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005564131578947367, + "loss": 0.756, + "step": 29550 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005563342105263158, + "loss": 0.7565, + "step": 29560 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005562552631578946, + "loss": 0.7852, + "step": 29570 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005561763157894737, + "loss": 0.764, + "step": 29580 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005560973684210525, + "loss": 0.7621, + "step": 29590 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005560184210526316, + "loss": 0.773, + "step": 29600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005559394736842104, + "loss": 0.7667, + "step": 29610 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005558605263157895, + "loss": 0.7601, + "step": 29620 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005557815789473683, + "loss": 0.7701, + "step": 29630 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005557026315789473, + "loss": 0.7738, + "step": 29640 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005556236842105263, + "loss": 0.7708, + "step": 29650 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005555447368421052, + "loss": 0.7586, + "step": 29660 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005554657894736842, + "loss": 0.7645, + "step": 29670 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005553868421052631, + "loss": 0.7619, + "step": 29680 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005553078947368421, + "loss": 0.7617, + "step": 29690 + }, + { + "epoch": 0.3, + "learning_rate": 0.000555228947368421, + "loss": 0.7491, + "step": 29700 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005551499999999999, + "loss": 0.767, + "step": 29710 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005550710526315789, + "loss": 0.7461, + "step": 29720 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005549921052631578, + "loss": 0.7609, + "step": 29730 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005549131578947369, + "loss": 0.748, + "step": 29740 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005548342105263157, + "loss": 0.7701, + "step": 29750 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005547631578947368, + "loss": 0.7538, + "step": 29760 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005546842105263157, + "loss": 0.7573, + "step": 29770 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005546052631578947, + "loss": 0.7668, + "step": 29780 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005545263157894736, + "loss": 0.7655, + "step": 29790 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005544473684210526, + "loss": 0.7486, + "step": 29800 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005543684210526315, + "loss": 0.7578, + "step": 29810 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005542894736842105, + "loss": 0.7572, + "step": 29820 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005542105263157894, + "loss": 0.7479, + "step": 29830 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005541315789473684, + "loss": 0.7673, + "step": 29840 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005540526315789473, + "loss": 0.7573, + "step": 29850 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005539736842105263, + "loss": 0.7462, + "step": 29860 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005538947368421052, + "loss": 0.7546, + "step": 29870 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005538157894736842, + "loss": 0.7494, + "step": 29880 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005537368421052631, + "loss": 0.7548, + "step": 29890 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005536578947368421, + "loss": 0.7572, + "step": 29900 + }, + { + "epoch": 0.3, + "learning_rate": 0.000553578947368421, + "loss": 0.7564, + "step": 29910 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005535, + "loss": 0.7634, + "step": 29920 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005534210526315789, + "loss": 0.7591, + "step": 29930 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005533421052631579, + "loss": 0.7549, + "step": 29940 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005532631578947368, + "loss": 0.7569, + "step": 29950 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005531842105263157, + "loss": 0.7466, + "step": 29960 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005531052631578947, + "loss": 0.7431, + "step": 29970 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005530263157894736, + "loss": 0.7572, + "step": 29980 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005529473684210526, + "loss": 0.7676, + "step": 29990 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005528684210526315, + "loss": 0.7634, + "step": 30000 + }, + { + "epoch": 0.3, + "eval_accuracy": 0.8433683834141076, + "eval_loss": 0.76123046875, + "eval_runtime": 96.8175, + "eval_samples_per_second": 826.297, + "eval_steps_per_second": 1.622, + "step": 30000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005527894736842105, + "loss": 0.74, + "step": 30010 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005527105263157894, + "loss": 0.762, + "step": 30020 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005526315789473684, + "loss": 0.7723, + "step": 30030 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005525526315789473, + "loss": 0.7572, + "step": 30040 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005524736842105263, + "loss": 0.7527, + "step": 30050 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005523947368421052, + "loss": 0.7593, + "step": 30060 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005523157894736842, + "loss": 0.7476, + "step": 30070 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005522368421052631, + "loss": 0.7554, + "step": 30080 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005521578947368421, + "loss": 0.7492, + "step": 30090 + }, + { + "epoch": 0.3, + "learning_rate": 0.000552078947368421, + "loss": 0.7597, + "step": 30100 + }, + { + "epoch": 0.3, + "learning_rate": 0.000552, + "loss": 0.7607, + "step": 30110 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005519210526315789, + "loss": 0.7455, + "step": 30120 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005518421052631579, + "loss": 0.7545, + "step": 30130 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005517631578947368, + "loss": 0.7592, + "step": 30140 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005516842105263158, + "loss": 0.7563, + "step": 30150 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005516052631578947, + "loss": 0.7479, + "step": 30160 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005515263157894737, + "loss": 0.7569, + "step": 30170 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005514473684210526, + "loss": 0.7524, + "step": 30180 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005513684210526316, + "loss": 0.7568, + "step": 30190 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005512894736842105, + "loss": 0.7635, + "step": 30200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005512105263157895, + "loss": 0.7526, + "step": 30210 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005511315789473684, + "loss": 0.7605, + "step": 30220 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005510526315789474, + "loss": 0.7517, + "step": 30230 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005509736842105263, + "loss": 0.7414, + "step": 30240 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005508947368421051, + "loss": 0.7578, + "step": 30250 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005508157894736842, + "loss": 0.7417, + "step": 30260 + }, + { + "epoch": 0.3, + "learning_rate": 0.000550736842105263, + "loss": 0.7437, + "step": 30270 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005506578947368421, + "loss": 0.7469, + "step": 30280 + }, + { + "epoch": 0.3, + "learning_rate": 0.000550578947368421, + "loss": 0.7589, + "step": 30290 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005505, + "loss": 0.7563, + "step": 30300 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005504210526315789, + "loss": 0.7491, + "step": 30310 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005503421052631578, + "loss": 0.7615, + "step": 30320 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005502631578947368, + "loss": 0.7601, + "step": 30330 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005501842105263157, + "loss": 0.7411, + "step": 30340 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005501052631578947, + "loss": 0.7549, + "step": 30350 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005500263157894736, + "loss": 0.7653, + "step": 30360 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005499473684210526, + "loss": 0.7534, + "step": 30370 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005498684210526316, + "loss": 0.754, + "step": 30380 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005497894736842104, + "loss": 0.7536, + "step": 30390 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005497105263157895, + "loss": 0.7557, + "step": 30400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005496315789473683, + "loss": 0.7542, + "step": 30410 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005495526315789474, + "loss": 0.761, + "step": 30420 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005494736842105262, + "loss": 0.7696, + "step": 30430 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005493947368421053, + "loss": 0.752, + "step": 30440 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005493157894736841, + "loss": 0.7548, + "step": 30450 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005492368421052631, + "loss": 0.7614, + "step": 30460 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005491578947368421, + "loss": 0.7621, + "step": 30470 + }, + { + "epoch": 0.3, + "learning_rate": 0.000549078947368421, + "loss": 0.7664, + "step": 30480 + }, + { + "epoch": 0.3, + "learning_rate": 0.000549, + "loss": 0.7525, + "step": 30490 + }, + { + "epoch": 0.3, + "learning_rate": 0.0005489210526315788, + "loss": 0.7453, + "step": 30500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005488421052631579, + "loss": 0.7522, + "step": 30510 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005487631578947367, + "loss": 0.7564, + "step": 30520 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005486842105263158, + "loss": 0.772, + "step": 30530 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005486052631578946, + "loss": 0.7477, + "step": 30540 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005485263157894736, + "loss": 0.7515, + "step": 30550 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005484473684210526, + "loss": 0.7445, + "step": 30560 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005483684210526315, + "loss": 0.7702, + "step": 30570 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005482894736842105, + "loss": 0.758, + "step": 30580 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005482105263157894, + "loss": 0.7574, + "step": 30590 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005481315789473684, + "loss": 0.7539, + "step": 30600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005480526315789473, + "loss": 0.7551, + "step": 30610 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005479736842105262, + "loss": 0.7581, + "step": 30620 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005478947368421052, + "loss": 0.7581, + "step": 30630 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005478157894736841, + "loss": 0.7569, + "step": 30640 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005477368421052632, + "loss": 0.7576, + "step": 30650 + }, + { + "epoch": 0.31, + "learning_rate": 0.000547657894736842, + "loss": 0.7551, + "step": 30660 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005475789473684211, + "loss": 0.7675, + "step": 30670 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005474999999999999, + "loss": 0.7667, + "step": 30680 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005474210526315789, + "loss": 0.7623, + "step": 30690 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005473421052631578, + "loss": 0.7515, + "step": 30700 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005472631578947368, + "loss": 0.7656, + "step": 30710 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005471842105263157, + "loss": 0.7542, + "step": 30720 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005471052631578947, + "loss": 0.7649, + "step": 30730 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005470263157894737, + "loss": 0.7544, + "step": 30740 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005469473684210526, + "loss": 0.7633, + "step": 30750 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005468763157894736, + "loss": 0.7596, + "step": 30760 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005467973684210526, + "loss": 0.7561, + "step": 30770 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005467184210526315, + "loss": 0.7612, + "step": 30780 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005466394736842105, + "loss": 0.7541, + "step": 30790 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005465605263157894, + "loss": 0.7617, + "step": 30800 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005464815789473684, + "loss": 0.749, + "step": 30810 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005464026315789473, + "loss": 0.7634, + "step": 30820 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005463236842105263, + "loss": 0.7516, + "step": 30830 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005462447368421052, + "loss": 0.7469, + "step": 30840 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005461657894736842, + "loss": 0.7483, + "step": 30850 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005460868421052631, + "loss": 0.7579, + "step": 30860 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005460078947368421, + "loss": 0.7577, + "step": 30870 + }, + { + "epoch": 0.31, + "learning_rate": 0.000545928947368421, + "loss": 0.7598, + "step": 30880 + }, + { + "epoch": 0.31, + "learning_rate": 0.00054585, + "loss": 0.7574, + "step": 30890 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005457710526315789, + "loss": 0.7502, + "step": 30900 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005456921052631579, + "loss": 0.7667, + "step": 30910 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005456131578947368, + "loss": 0.7648, + "step": 30920 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005455342105263158, + "loss": 0.7654, + "step": 30930 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005454552631578947, + "loss": 0.7654, + "step": 30940 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005453763157894736, + "loss": 0.7554, + "step": 30950 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005452973684210526, + "loss": 0.7673, + "step": 30960 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005452184210526315, + "loss": 0.7704, + "step": 30970 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005451394736842105, + "loss": 0.7606, + "step": 30980 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005450605263157894, + "loss": 0.7612, + "step": 30990 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005449815789473684, + "loss": 0.7536, + "step": 31000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005449026315789473, + "loss": 0.7804, + "step": 31010 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005448236842105263, + "loss": 0.7685, + "step": 31020 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005447447368421052, + "loss": 0.7722, + "step": 31030 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005446657894736842, + "loss": 0.7534, + "step": 31040 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005445868421052631, + "loss": 0.7505, + "step": 31050 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005445078947368421, + "loss": 0.7667, + "step": 31060 + }, + { + "epoch": 0.31, + "learning_rate": 0.000544428947368421, + "loss": 0.7583, + "step": 31070 + }, + { + "epoch": 0.31, + "learning_rate": 0.00054435, + "loss": 0.7462, + "step": 31080 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005442710526315789, + "loss": 0.7504, + "step": 31090 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005441921052631579, + "loss": 0.7445, + "step": 31100 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005441131578947368, + "loss": 0.757, + "step": 31110 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005440342105263158, + "loss": 0.7521, + "step": 31120 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005439552631578947, + "loss": 0.7544, + "step": 31130 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005438763157894737, + "loss": 0.776, + "step": 31140 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005437973684210526, + "loss": 0.7558, + "step": 31150 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005437184210526315, + "loss": 0.7536, + "step": 31160 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005436394736842105, + "loss": 0.7672, + "step": 31170 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005435605263157895, + "loss": 0.7524, + "step": 31180 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005434815789473684, + "loss": 0.7593, + "step": 31190 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005434026315789474, + "loss": 0.7449, + "step": 31200 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005433236842105263, + "loss": 0.7637, + "step": 31210 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005432447368421053, + "loss": 0.749, + "step": 31220 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005431657894736841, + "loss": 0.7534, + "step": 31230 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005430868421052631, + "loss": 0.7537, + "step": 31240 + }, + { + "epoch": 0.31, + "learning_rate": 0.000543007894736842, + "loss": 0.7719, + "step": 31250 + }, + { + "epoch": 0.31, + "learning_rate": 0.000542928947368421, + "loss": 0.7491, + "step": 31260 + }, + { + "epoch": 0.31, + "learning_rate": 0.00054285, + "loss": 0.7525, + "step": 31270 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005427710526315789, + "loss": 0.7667, + "step": 31280 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005426921052631579, + "loss": 0.7592, + "step": 31290 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005426131578947367, + "loss": 0.7334, + "step": 31300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005425342105263158, + "loss": 0.7551, + "step": 31310 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005424552631578946, + "loss": 0.7591, + "step": 31320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005423763157894737, + "loss": 0.7398, + "step": 31330 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005422973684210525, + "loss": 0.7528, + "step": 31340 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005422184210526316, + "loss": 0.7515, + "step": 31350 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005421394736842104, + "loss": 0.7617, + "step": 31360 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005420605263157894, + "loss": 0.7424, + "step": 31370 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005419815789473684, + "loss": 0.7467, + "step": 31380 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005419026315789473, + "loss": 0.752, + "step": 31390 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005418236842105263, + "loss": 0.7533, + "step": 31400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005417447368421052, + "loss": 0.7586, + "step": 31410 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005416657894736842, + "loss": 0.7534, + "step": 31420 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005415868421052631, + "loss": 0.7456, + "step": 31430 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005415078947368421, + "loss": 0.7484, + "step": 31440 + }, + { + "epoch": 0.31, + "learning_rate": 0.000541428947368421, + "loss": 0.7525, + "step": 31450 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005413499999999999, + "loss": 0.75, + "step": 31460 + }, + { + "epoch": 0.31, + "learning_rate": 0.000541271052631579, + "loss": 0.7527, + "step": 31470 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005411921052631578, + "loss": 0.7389, + "step": 31480 + }, + { + "epoch": 0.31, + "learning_rate": 0.0005411131578947369, + "loss": 0.7251, + "step": 31490 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005410342105263157, + "loss": 0.7385, + "step": 31500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005409552631578948, + "loss": 0.7557, + "step": 31510 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005408763157894736, + "loss": 0.7681, + "step": 31520 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005407973684210525, + "loss": 0.752, + "step": 31530 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005407184210526315, + "loss": 0.7445, + "step": 31540 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005406394736842104, + "loss": 0.7544, + "step": 31550 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005405605263157895, + "loss": 0.7519, + "step": 31560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005404815789473683, + "loss": 0.7554, + "step": 31570 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005404026315789474, + "loss": 0.7552, + "step": 31580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005403236842105262, + "loss": 0.7531, + "step": 31590 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005402447368421052, + "loss": 0.7452, + "step": 31600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005401657894736841, + "loss": 0.7409, + "step": 31610 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005400868421052631, + "loss": 0.7496, + "step": 31620 + }, + { + "epoch": 0.32, + "learning_rate": 0.000540007894736842, + "loss": 0.7526, + "step": 31630 + }, + { + "epoch": 0.32, + "learning_rate": 0.000539928947368421, + "loss": 0.7584, + "step": 31640 + }, + { + "epoch": 0.32, + "learning_rate": 0.00053985, + "loss": 0.7541, + "step": 31650 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005397710526315789, + "loss": 0.7505, + "step": 31660 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005396921052631578, + "loss": 0.7549, + "step": 31670 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005396131578947368, + "loss": 0.7518, + "step": 31680 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005395342105263157, + "loss": 0.7668, + "step": 31690 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005394552631578947, + "loss": 0.7565, + "step": 31700 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005393763157894736, + "loss": 0.7698, + "step": 31710 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005392973684210526, + "loss": 0.7571, + "step": 31720 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005392184210526315, + "loss": 0.7514, + "step": 31730 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005391394736842105, + "loss": 0.7492, + "step": 31740 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005390605263157894, + "loss": 0.7502, + "step": 31750 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005389894736842105, + "loss": 0.7524, + "step": 31760 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005389105263157894, + "loss": 0.7314, + "step": 31770 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005388315789473684, + "loss": 0.7403, + "step": 31780 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005387526315789473, + "loss": 0.7702, + "step": 31790 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005386736842105263, + "loss": 0.7652, + "step": 31800 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005385947368421052, + "loss": 0.7406, + "step": 31810 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005385157894736842, + "loss": 0.7541, + "step": 31820 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005384368421052631, + "loss": 0.7522, + "step": 31830 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005383578947368421, + "loss": 0.7502, + "step": 31840 + }, + { + "epoch": 0.32, + "learning_rate": 0.000538278947368421, + "loss": 0.7463, + "step": 31850 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005382, + "loss": 0.7564, + "step": 31860 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005381210526315789, + "loss": 0.7583, + "step": 31870 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005380421052631579, + "loss": 0.7582, + "step": 31880 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005379631578947368, + "loss": 0.7337, + "step": 31890 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005378842105263158, + "loss": 0.7439, + "step": 31900 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005378052631578947, + "loss": 0.7558, + "step": 31910 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005377263157894737, + "loss": 0.7434, + "step": 31920 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005376473684210526, + "loss": 0.7456, + "step": 31930 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005375684210526315, + "loss": 0.7559, + "step": 31940 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005374894736842105, + "loss": 0.7459, + "step": 31950 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005374105263157894, + "loss": 0.7421, + "step": 31960 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005373315789473684, + "loss": 0.7134, + "step": 31970 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005372526315789473, + "loss": 0.737, + "step": 31980 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005371736842105263, + "loss": 0.7193, + "step": 31990 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005370947368421052, + "loss": 0.7333, + "step": 32000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005370157894736842, + "loss": 0.7228, + "step": 32010 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005369368421052631, + "loss": 0.7304, + "step": 32020 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005368578947368421, + "loss": 0.7186, + "step": 32030 + }, + { + "epoch": 0.32, + "learning_rate": 0.000536778947368421, + "loss": 0.7269, + "step": 32040 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005367, + "loss": 0.7097, + "step": 32050 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005366210526315789, + "loss": 0.73, + "step": 32060 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005365421052631578, + "loss": 0.7304, + "step": 32070 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005364631578947368, + "loss": 0.7259, + "step": 32080 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005363842105263158, + "loss": 0.7218, + "step": 32090 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005363052631578947, + "loss": 0.7385, + "step": 32100 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005362263157894737, + "loss": 0.7354, + "step": 32110 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005361473684210526, + "loss": 0.7346, + "step": 32120 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005360684210526316, + "loss": 0.73, + "step": 32130 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005359894736842104, + "loss": 0.728, + "step": 32140 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005359105263157895, + "loss": 0.7234, + "step": 32150 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005358315789473683, + "loss": 0.7322, + "step": 32160 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005357526315789474, + "loss": 0.726, + "step": 32170 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005356736842105263, + "loss": 0.7364, + "step": 32180 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005355947368421053, + "loss": 0.7312, + "step": 32190 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005355157894736842, + "loss": 0.7171, + "step": 32200 + }, + { + "epoch": 0.32, + "learning_rate": 0.000535436842105263, + "loss": 0.72, + "step": 32210 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005353578947368421, + "loss": 0.7491, + "step": 32220 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005352789473684209, + "loss": 0.7367, + "step": 32230 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005352, + "loss": 0.742, + "step": 32240 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005351210526315788, + "loss": 0.7544, + "step": 32250 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005350421052631579, + "loss": 0.7528, + "step": 32260 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005349631578947368, + "loss": 0.7569, + "step": 32270 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005348842105263157, + "loss": 0.7343, + "step": 32280 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005348052631578947, + "loss": 0.7507, + "step": 32290 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005347263157894736, + "loss": 0.7515, + "step": 32300 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005346473684210526, + "loss": 0.7291, + "step": 32310 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005345684210526315, + "loss": 0.7468, + "step": 32320 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005344894736842105, + "loss": 0.7384, + "step": 32330 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005344105263157894, + "loss": 0.7359, + "step": 32340 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005343315789473684, + "loss": 0.7395, + "step": 32350 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005342526315789474, + "loss": 0.7398, + "step": 32360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005341736842105262, + "loss": 0.7363, + "step": 32370 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005340947368421053, + "loss": 0.7574, + "step": 32380 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005340157894736841, + "loss": 0.7408, + "step": 32390 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005339368421052632, + "loss": 0.7352, + "step": 32400 + }, + { + "epoch": 0.32, + "learning_rate": 0.000533857894736842, + "loss": 0.7276, + "step": 32410 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005337789473684211, + "loss": 0.7376, + "step": 32420 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005336999999999999, + "loss": 0.7443, + "step": 32430 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005336210526315789, + "loss": 0.7413, + "step": 32440 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005335421052631578, + "loss": 0.7386, + "step": 32450 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005334631578947368, + "loss": 0.732, + "step": 32460 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005333842105263158, + "loss": 0.7392, + "step": 32470 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005333052631578946, + "loss": 0.7499, + "step": 32480 + }, + { + "epoch": 0.32, + "learning_rate": 0.0005332263157894737, + "loss": 0.7445, + "step": 32490 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005331473684210525, + "loss": 0.7359, + "step": 32500 + }, + { + "epoch": 0.33, + "eval_accuracy": 0.8473949658152091, + "eval_loss": 0.73388671875, + "eval_runtime": 97.0409, + "eval_samples_per_second": 824.395, + "eval_steps_per_second": 1.618, + "step": 32500 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005330684210526315, + "loss": 0.7473, + "step": 32510 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005329894736842104, + "loss": 0.7474, + "step": 32520 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005329105263157894, + "loss": 0.7436, + "step": 32530 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005328315789473683, + "loss": 0.7425, + "step": 32540 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005327526315789473, + "loss": 0.7468, + "step": 32550 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005326736842105263, + "loss": 0.7484, + "step": 32560 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005325947368421052, + "loss": 0.7538, + "step": 32570 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005325157894736841, + "loss": 0.7434, + "step": 32580 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005324368421052631, + "loss": 0.7416, + "step": 32590 + }, + { + "epoch": 0.33, + "learning_rate": 0.000532357894736842, + "loss": 0.7532, + "step": 32600 + }, + { + "epoch": 0.33, + "learning_rate": 0.000532278947368421, + "loss": 0.7541, + "step": 32610 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005321999999999999, + "loss": 0.7343, + "step": 32620 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005321210526315789, + "loss": 0.7493, + "step": 32630 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005320421052631578, + "loss": 0.7483, + "step": 32640 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005319631578947368, + "loss": 0.7399, + "step": 32650 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005318842105263157, + "loss": 0.7524, + "step": 32660 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005318052631578947, + "loss": 0.7564, + "step": 32670 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005317263157894736, + "loss": 0.7478, + "step": 32680 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005316473684210526, + "loss": 0.7527, + "step": 32690 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005315684210526315, + "loss": 0.738, + "step": 32700 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005314894736842105, + "loss": 0.7476, + "step": 32710 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005314105263157894, + "loss": 0.7487, + "step": 32720 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005313315789473684, + "loss": 0.7353, + "step": 32730 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005312526315789473, + "loss": 0.7482, + "step": 32740 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005311736842105263, + "loss": 0.751, + "step": 32750 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005311026315789473, + "loss": 0.7522, + "step": 32760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005310236842105263, + "loss": 0.7365, + "step": 32770 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005309447368421052, + "loss": 0.7514, + "step": 32780 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005308657894736842, + "loss": 0.7545, + "step": 32790 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005307868421052631, + "loss": 0.7517, + "step": 32800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005307078947368421, + "loss": 0.746, + "step": 32810 + }, + { + "epoch": 0.33, + "learning_rate": 0.000530628947368421, + "loss": 0.743, + "step": 32820 + }, + { + "epoch": 0.33, + "learning_rate": 0.00053055, + "loss": 0.7451, + "step": 32830 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005304710526315789, + "loss": 0.7388, + "step": 32840 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005303921052631579, + "loss": 0.7551, + "step": 32850 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005303131578947368, + "loss": 0.7417, + "step": 32860 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005302342105263158, + "loss": 0.75, + "step": 32870 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005301552631578947, + "loss": 0.7354, + "step": 32880 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005300763157894737, + "loss": 0.7264, + "step": 32890 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005299973684210526, + "loss": 0.7326, + "step": 32900 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005299184210526316, + "loss": 0.7509, + "step": 32910 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005298394736842105, + "loss": 0.7294, + "step": 32920 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005297605263157894, + "loss": 0.7486, + "step": 32930 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005296815789473684, + "loss": 0.7352, + "step": 32940 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005296026315789473, + "loss": 0.7414, + "step": 32950 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005295236842105263, + "loss": 0.7458, + "step": 32960 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005294447368421052, + "loss": 0.7415, + "step": 32970 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005293657894736841, + "loss": 0.7358, + "step": 32980 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005292868421052631, + "loss": 0.7365, + "step": 32990 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005292078947368421, + "loss": 0.7359, + "step": 33000 + }, + { + "epoch": 0.33, + "learning_rate": 0.000529128947368421, + "loss": 0.7468, + "step": 33010 + }, + { + "epoch": 0.33, + "learning_rate": 0.00052905, + "loss": 0.742, + "step": 33020 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005289710526315789, + "loss": 0.7374, + "step": 33030 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005288921052631579, + "loss": 0.7457, + "step": 33040 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005288131578947367, + "loss": 0.7384, + "step": 33050 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005287342105263158, + "loss": 0.7441, + "step": 33060 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005286552631578946, + "loss": 0.747, + "step": 33070 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005285763157894737, + "loss": 0.7361, + "step": 33080 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005284973684210526, + "loss": 0.7278, + "step": 33090 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005284184210526316, + "loss": 0.7421, + "step": 33100 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005283394736842105, + "loss": 0.7444, + "step": 33110 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005282605263157894, + "loss": 0.7428, + "step": 33120 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005281815789473684, + "loss": 0.7315, + "step": 33130 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005281026315789473, + "loss": 0.7349, + "step": 33140 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005280236842105263, + "loss": 0.7473, + "step": 33150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005279447368421052, + "loss": 0.7376, + "step": 33160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005278657894736842, + "loss": 0.7405, + "step": 33170 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005277868421052632, + "loss": 0.7406, + "step": 33180 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005277078947368421, + "loss": 0.7355, + "step": 33190 + }, + { + "epoch": 0.33, + "learning_rate": 0.000527628947368421, + "loss": 0.7402, + "step": 33200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005275499999999999, + "loss": 0.7363, + "step": 33210 + }, + { + "epoch": 0.33, + "learning_rate": 0.000527471052631579, + "loss": 0.7324, + "step": 33220 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005273921052631578, + "loss": 0.7352, + "step": 33230 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005273131578947368, + "loss": 0.7458, + "step": 33240 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005272342105263157, + "loss": 0.743, + "step": 33250 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005271552631578947, + "loss": 0.7454, + "step": 33260 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005270763157894737, + "loss": 0.7403, + "step": 33270 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005269973684210525, + "loss": 0.7582, + "step": 33280 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005269184210526316, + "loss": 0.7467, + "step": 33290 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005268394736842104, + "loss": 0.7244, + "step": 33300 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005267605263157895, + "loss": 0.7226, + "step": 33310 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005266815789473683, + "loss": 0.7257, + "step": 33320 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005266026315789474, + "loss": 0.7287, + "step": 33330 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005265236842105262, + "loss": 0.716, + "step": 33340 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005264447368421052, + "loss": 0.7227, + "step": 33350 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005263657894736842, + "loss": 0.746, + "step": 33360 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005262868421052631, + "loss": 0.7369, + "step": 33370 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005262078947368421, + "loss": 0.7387, + "step": 33380 + }, + { + "epoch": 0.33, + "learning_rate": 0.000526128947368421, + "loss": 0.7452, + "step": 33390 + }, + { + "epoch": 0.33, + "learning_rate": 0.00052605, + "loss": 0.7576, + "step": 33400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005259710526315789, + "loss": 0.7297, + "step": 33410 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005258921052631578, + "loss": 0.7438, + "step": 33420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005258131578947368, + "loss": 0.7385, + "step": 33430 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005257342105263157, + "loss": 0.737, + "step": 33440 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005256552631578948, + "loss": 0.7463, + "step": 33450 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005255763157894736, + "loss": 0.7375, + "step": 33460 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005254973684210527, + "loss": 0.7349, + "step": 33470 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005254184210526315, + "loss": 0.7489, + "step": 33480 + }, + { + "epoch": 0.33, + "learning_rate": 0.0005253394736842104, + "loss": 0.717, + "step": 33490 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005252605263157894, + "loss": 0.7439, + "step": 33500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005251815789473683, + "loss": 0.7317, + "step": 33510 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005251026315789473, + "loss": 0.7183, + "step": 33520 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005250236842105262, + "loss": 0.7291, + "step": 33530 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005249447368421052, + "loss": 0.7308, + "step": 33540 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005248657894736841, + "loss": 0.732, + "step": 33550 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005247868421052632, + "loss": 0.7272, + "step": 33560 + }, + { + "epoch": 0.34, + "learning_rate": 0.000524707894736842, + "loss": 0.714, + "step": 33570 + }, + { + "epoch": 0.34, + "learning_rate": 0.000524628947368421, + "loss": 0.7142, + "step": 33580 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005245499999999999, + "loss": 0.7135, + "step": 33590 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005244710526315789, + "loss": 0.7322, + "step": 33600 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005243921052631578, + "loss": 0.7229, + "step": 33610 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005243131578947368, + "loss": 0.7302, + "step": 33620 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005242342105263157, + "loss": 0.7106, + "step": 33630 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005241552631578947, + "loss": 0.7233, + "step": 33640 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005240763157894736, + "loss": 0.7127, + "step": 33650 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005239973684210526, + "loss": 0.7349, + "step": 33660 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005239184210526315, + "loss": 0.7202, + "step": 33670 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005238394736842105, + "loss": 0.7272, + "step": 33680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005237605263157894, + "loss": 0.7234, + "step": 33690 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005236815789473684, + "loss": 0.7197, + "step": 33700 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005236026315789473, + "loss": 0.7289, + "step": 33710 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005235236842105263, + "loss": 0.7314, + "step": 33720 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005234447368421052, + "loss": 0.7327, + "step": 33730 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005233657894736842, + "loss": 0.7303, + "step": 33740 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005232868421052631, + "loss": 0.7394, + "step": 33750 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005232157894736842, + "loss": 0.738, + "step": 33760 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005231368421052631, + "loss": 0.7384, + "step": 33770 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005230578947368421, + "loss": 0.7299, + "step": 33780 + }, + { + "epoch": 0.34, + "learning_rate": 0.000522978947368421, + "loss": 0.7335, + "step": 33790 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005229, + "loss": 0.7289, + "step": 33800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005228210526315789, + "loss": 0.7436, + "step": 33810 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005227421052631579, + "loss": 0.7481, + "step": 33820 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005226631578947368, + "loss": 0.7415, + "step": 33830 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005225842105263158, + "loss": 0.7187, + "step": 33840 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005225052631578947, + "loss": 0.7431, + "step": 33850 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005224263157894737, + "loss": 0.7364, + "step": 33860 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005223473684210526, + "loss": 0.7437, + "step": 33870 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005222684210526316, + "loss": 0.7448, + "step": 33880 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005221894736842105, + "loss": 0.7319, + "step": 33890 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005221105263157895, + "loss": 0.7297, + "step": 33900 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005220315789473684, + "loss": 0.7319, + "step": 33910 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005219526315789474, + "loss": 0.7375, + "step": 33920 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005218736842105263, + "loss": 0.7316, + "step": 33930 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005217947368421052, + "loss": 0.7477, + "step": 33940 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005217157894736842, + "loss": 0.7337, + "step": 33950 + }, + { + "epoch": 0.34, + "learning_rate": 0.000521636842105263, + "loss": 0.7414, + "step": 33960 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005215578947368421, + "loss": 0.7522, + "step": 33970 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005214789473684209, + "loss": 0.7463, + "step": 33980 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005214, + "loss": 0.7378, + "step": 33990 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005213210526315789, + "loss": 0.7408, + "step": 34000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005212421052631579, + "loss": 0.7547, + "step": 34010 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005211631578947368, + "loss": 0.742, + "step": 34020 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005210842105263157, + "loss": 0.7354, + "step": 34030 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005210052631578947, + "loss": 0.7217, + "step": 34040 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005209263157894736, + "loss": 0.7308, + "step": 34050 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005208473684210526, + "loss": 0.7299, + "step": 34060 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005207684210526315, + "loss": 0.7335, + "step": 34070 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005206894736842105, + "loss": 0.7334, + "step": 34080 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005206105263157895, + "loss": 0.7333, + "step": 34090 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005205315789473684, + "loss": 0.7233, + "step": 34100 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005204526315789474, + "loss": 0.7271, + "step": 34110 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005203736842105262, + "loss": 0.7459, + "step": 34120 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005202947368421053, + "loss": 0.7334, + "step": 34130 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005202157894736841, + "loss": 0.7251, + "step": 34140 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005201368421052632, + "loss": 0.7363, + "step": 34150 + }, + { + "epoch": 0.34, + "learning_rate": 0.000520057894736842, + "loss": 0.7462, + "step": 34160 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005199789473684211, + "loss": 0.7539, + "step": 34170 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005199, + "loss": 0.7338, + "step": 34180 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005198210526315788, + "loss": 0.7268, + "step": 34190 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005197421052631579, + "loss": 0.7311, + "step": 34200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005196631578947367, + "loss": 0.7339, + "step": 34210 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005195842105263158, + "loss": 0.7382, + "step": 34220 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005195052631578946, + "loss": 0.7358, + "step": 34230 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005194263157894737, + "loss": 0.7483, + "step": 34240 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005193473684210525, + "loss": 0.7288, + "step": 34250 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005192684210526315, + "loss": 0.7244, + "step": 34260 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005191894736842105, + "loss": 0.7238, + "step": 34270 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005191105263157894, + "loss": 0.7276, + "step": 34280 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005190315789473684, + "loss": 0.7254, + "step": 34290 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005189526315789473, + "loss": 0.7365, + "step": 34300 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005188736842105263, + "loss": 0.7291, + "step": 34310 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005187947368421052, + "loss": 0.7361, + "step": 34320 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005187157894736841, + "loss": 0.7284, + "step": 34330 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005186368421052631, + "loss": 0.735, + "step": 34340 + }, + { + "epoch": 0.34, + "learning_rate": 0.000518557894736842, + "loss": 0.7269, + "step": 34350 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005184789473684211, + "loss": 0.7336, + "step": 34360 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005183999999999999, + "loss": 0.7274, + "step": 34370 + }, + { + "epoch": 0.34, + "learning_rate": 0.000518321052631579, + "loss": 0.7402, + "step": 34380 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005182421052631578, + "loss": 0.7573, + "step": 34390 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005181631578947368, + "loss": 0.7416, + "step": 34400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005180842105263157, + "loss": 0.7312, + "step": 34410 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005180052631578947, + "loss": 0.7386, + "step": 34420 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005179263157894736, + "loss": 0.7324, + "step": 34430 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005178473684210526, + "loss": 0.7301, + "step": 34440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005177684210526316, + "loss": 0.7395, + "step": 34450 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005176894736842105, + "loss": 0.7325, + "step": 34460 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005176105263157895, + "loss": 0.7335, + "step": 34470 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005175315789473683, + "loss": 0.7278, + "step": 34480 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005174526315789473, + "loss": 0.7283, + "step": 34490 + }, + { + "epoch": 0.34, + "learning_rate": 0.0005173736842105262, + "loss": 0.7152, + "step": 34500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005172947368421052, + "loss": 0.7211, + "step": 34510 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005172157894736841, + "loss": 0.7264, + "step": 34520 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005171368421052631, + "loss": 0.7357, + "step": 34530 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005170578947368421, + "loss": 0.724, + "step": 34540 + }, + { + "epoch": 0.35, + "learning_rate": 0.000516978947368421, + "loss": 0.7043, + "step": 34550 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005168999999999999, + "loss": 0.7032, + "step": 34560 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005168210526315789, + "loss": 0.7127, + "step": 34570 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005167421052631578, + "loss": 0.7052, + "step": 34580 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005166631578947368, + "loss": 0.7144, + "step": 34590 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005165842105263157, + "loss": 0.7111, + "step": 34600 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005165052631578947, + "loss": 0.7303, + "step": 34610 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005164263157894736, + "loss": 0.7421, + "step": 34620 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005163473684210526, + "loss": 0.7404, + "step": 34630 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005162684210526315, + "loss": 0.7299, + "step": 34640 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005161894736842105, + "loss": 0.7335, + "step": 34650 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005161105263157894, + "loss": 0.7416, + "step": 34660 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005160315789473684, + "loss": 0.7242, + "step": 34670 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005159526315789473, + "loss": 0.7375, + "step": 34680 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005158736842105263, + "loss": 0.7385, + "step": 34690 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005157947368421052, + "loss": 0.7338, + "step": 34700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005157157894736842, + "loss": 0.7183, + "step": 34710 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005156368421052631, + "loss": 0.7195, + "step": 34720 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005155578947368421, + "loss": 0.729, + "step": 34730 + }, + { + "epoch": 0.35, + "learning_rate": 0.000515478947368421, + "loss": 0.7371, + "step": 34740 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005154, + "loss": 0.7364, + "step": 34750 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005153210526315789, + "loss": 0.7323, + "step": 34760 + }, + { + "epoch": 0.35, + "learning_rate": 0.00051525, + "loss": 0.7454, + "step": 34770 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005151710526315789, + "loss": 0.7339, + "step": 34780 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005150921052631579, + "loss": 0.7392, + "step": 34790 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005150131578947368, + "loss": 0.7508, + "step": 34800 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005149342105263158, + "loss": 0.7465, + "step": 34810 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005148552631578947, + "loss": 0.7408, + "step": 34820 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005147763157894737, + "loss": 0.7322, + "step": 34830 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005146973684210526, + "loss": 0.7364, + "step": 34840 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005146184210526316, + "loss": 0.7434, + "step": 34850 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005145394736842105, + "loss": 0.7299, + "step": 34860 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005144605263157893, + "loss": 0.7336, + "step": 34870 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005143815789473684, + "loss": 0.748, + "step": 34880 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005143026315789474, + "loss": 0.7367, + "step": 34890 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005142236842105263, + "loss": 0.7221, + "step": 34900 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005141447368421053, + "loss": 0.7281, + "step": 34910 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005140657894736842, + "loss": 0.7425, + "step": 34920 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005139868421052631, + "loss": 0.7206, + "step": 34930 + }, + { + "epoch": 0.35, + "learning_rate": 0.000513907894736842, + "loss": 0.7306, + "step": 34940 + }, + { + "epoch": 0.35, + "learning_rate": 0.000513828947368421, + "loss": 0.7212, + "step": 34950 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005137499999999999, + "loss": 0.7291, + "step": 34960 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005136710526315789, + "loss": 0.7288, + "step": 34970 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005135921052631579, + "loss": 0.7239, + "step": 34980 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005135131578947368, + "loss": 0.713, + "step": 34990 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005134342105263158, + "loss": 0.7263, + "step": 35000 + }, + { + "epoch": 0.35, + "eval_accuracy": 0.8494970666829337, + "eval_loss": 0.72412109375, + "eval_runtime": 96.4579, + "eval_samples_per_second": 829.378, + "eval_steps_per_second": 1.628, + "step": 35000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005133552631578947, + "loss": 0.7198, + "step": 35010 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005132763157894737, + "loss": 0.7359, + "step": 35020 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005132052631578946, + "loss": 0.7295, + "step": 35030 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005131263157894736, + "loss": 0.7355, + "step": 35040 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005130473684210525, + "loss": 0.7266, + "step": 35050 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005129684210526316, + "loss": 0.7277, + "step": 35060 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005128894736842104, + "loss": 0.7344, + "step": 35070 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005128105263157895, + "loss": 0.716, + "step": 35080 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005127315789473683, + "loss": 0.7256, + "step": 35090 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005126526315789473, + "loss": 0.7139, + "step": 35100 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005125736842105262, + "loss": 0.7295, + "step": 35110 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005124947368421052, + "loss": 0.7233, + "step": 35120 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005124157894736841, + "loss": 0.7247, + "step": 35130 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005123368421052631, + "loss": 0.7187, + "step": 35140 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005122578947368421, + "loss": 0.73, + "step": 35150 + }, + { + "epoch": 0.35, + "learning_rate": 0.000512178947368421, + "loss": 0.7323, + "step": 35160 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005120999999999999, + "loss": 0.725, + "step": 35170 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005120210526315789, + "loss": 0.7245, + "step": 35180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005119421052631578, + "loss": 0.7183, + "step": 35190 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005118631578947368, + "loss": 0.7255, + "step": 35200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005117842105263157, + "loss": 0.7156, + "step": 35210 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005117052631578947, + "loss": 0.7228, + "step": 35220 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005116263157894736, + "loss": 0.7315, + "step": 35230 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005115473684210526, + "loss": 0.7284, + "step": 35240 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005114684210526315, + "loss": 0.7303, + "step": 35250 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005113894736842105, + "loss": 0.7175, + "step": 35260 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005113105263157894, + "loss": 0.7213, + "step": 35270 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005112315789473684, + "loss": 0.7116, + "step": 35280 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005111526315789473, + "loss": 0.7201, + "step": 35290 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005110736842105263, + "loss": 0.7214, + "step": 35300 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005109947368421052, + "loss": 0.7244, + "step": 35310 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005109157894736841, + "loss": 0.7074, + "step": 35320 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005108368421052631, + "loss": 0.7161, + "step": 35330 + }, + { + "epoch": 0.35, + "learning_rate": 0.000510757894736842, + "loss": 0.7144, + "step": 35340 + }, + { + "epoch": 0.35, + "learning_rate": 0.000510678947368421, + "loss": 0.7091, + "step": 35350 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005105999999999999, + "loss": 0.7014, + "step": 35360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005105210526315789, + "loss": 0.7294, + "step": 35370 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005104421052631578, + "loss": 0.7478, + "step": 35380 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005103631578947368, + "loss": 0.7266, + "step": 35390 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005102842105263157, + "loss": 0.7205, + "step": 35400 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005102052631578947, + "loss": 0.7177, + "step": 35410 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005101263157894736, + "loss": 0.7212, + "step": 35420 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005100473684210526, + "loss": 0.7242, + "step": 35430 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005099684210526315, + "loss": 0.723, + "step": 35440 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005098894736842105, + "loss": 0.7216, + "step": 35450 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005098105263157894, + "loss": 0.7143, + "step": 35460 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005097315789473684, + "loss": 0.7294, + "step": 35470 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005096526315789473, + "loss": 0.7084, + "step": 35480 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005095736842105263, + "loss": 0.7262, + "step": 35490 + }, + { + "epoch": 0.35, + "learning_rate": 0.0005094947368421052, + "loss": 0.7139, + "step": 35500 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005094157894736842, + "loss": 0.7152, + "step": 35510 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005093368421052631, + "loss": 0.7184, + "step": 35520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005092578947368421, + "loss": 0.7286, + "step": 35530 + }, + { + "epoch": 0.36, + "learning_rate": 0.000509178947368421, + "loss": 0.7231, + "step": 35540 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005091, + "loss": 0.7326, + "step": 35550 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005090210526315789, + "loss": 0.7314, + "step": 35560 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005089421052631579, + "loss": 0.7175, + "step": 35570 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005088631578947368, + "loss": 0.7295, + "step": 35580 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005087842105263158, + "loss": 0.7197, + "step": 35590 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005087052631578947, + "loss": 0.7279, + "step": 35600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005086263157894737, + "loss": 0.7315, + "step": 35610 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005085473684210526, + "loss": 0.7144, + "step": 35620 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005084684210526315, + "loss": 0.7167, + "step": 35630 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005083894736842105, + "loss": 0.7193, + "step": 35640 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005083105263157894, + "loss": 0.728, + "step": 35650 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005082315789473684, + "loss": 0.7328, + "step": 35660 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005081526315789473, + "loss": 0.7275, + "step": 35670 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005080736842105263, + "loss": 0.7131, + "step": 35680 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005079947368421052, + "loss": 0.7281, + "step": 35690 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005079157894736842, + "loss": 0.739, + "step": 35700 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005078368421052631, + "loss": 0.7187, + "step": 35710 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005077578947368421, + "loss": 0.7069, + "step": 35720 + }, + { + "epoch": 0.36, + "learning_rate": 0.000507678947368421, + "loss": 0.709, + "step": 35730 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005076, + "loss": 0.7044, + "step": 35740 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005075210526315789, + "loss": 0.7058, + "step": 35750 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005074421052631579, + "loss": 0.7104, + "step": 35760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005073631578947368, + "loss": 0.7264, + "step": 35770 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005072842105263157, + "loss": 0.7214, + "step": 35780 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005072052631578947, + "loss": 0.7252, + "step": 35790 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005071263157894737, + "loss": 0.7139, + "step": 35800 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005070473684210526, + "loss": 0.7245, + "step": 35810 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005069684210526316, + "loss": 0.7257, + "step": 35820 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005068894736842105, + "loss": 0.732, + "step": 35830 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005068105263157895, + "loss": 0.7318, + "step": 35840 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005067315789473683, + "loss": 0.7307, + "step": 35850 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005066526315789474, + "loss": 0.7198, + "step": 35860 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005065736842105262, + "loss": 0.7259, + "step": 35870 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005064947368421053, + "loss": 0.7207, + "step": 35880 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005064157894736842, + "loss": 0.7214, + "step": 35890 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005063368421052632, + "loss": 0.716, + "step": 35900 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005062578947368421, + "loss": 0.7169, + "step": 35910 + }, + { + "epoch": 0.36, + "learning_rate": 0.000506178947368421, + "loss": 0.7143, + "step": 35920 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005061, + "loss": 0.7174, + "step": 35930 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005060210526315788, + "loss": 0.7246, + "step": 35940 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005059421052631579, + "loss": 0.7279, + "step": 35950 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005058631578947367, + "loss": 0.7095, + "step": 35960 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005057842105263158, + "loss": 0.7167, + "step": 35970 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005057052631578947, + "loss": 0.7285, + "step": 35980 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005056263157894737, + "loss": 0.7328, + "step": 35990 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005055473684210526, + "loss": 0.7097, + "step": 36000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005054684210526315, + "loss": 0.7217, + "step": 36010 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005053894736842105, + "loss": 0.7156, + "step": 36020 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005053105263157894, + "loss": 0.7147, + "step": 36030 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005052315789473684, + "loss": 0.7142, + "step": 36040 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005051526315789473, + "loss": 0.7222, + "step": 36050 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005050736842105263, + "loss": 0.7305, + "step": 36060 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005049947368421053, + "loss": 0.7179, + "step": 36070 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005049157894736841, + "loss": 0.7175, + "step": 36080 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005048368421052632, + "loss": 0.7293, + "step": 36090 + }, + { + "epoch": 0.36, + "learning_rate": 0.000504757894736842, + "loss": 0.718, + "step": 36100 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005046789473684211, + "loss": 0.7118, + "step": 36110 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005045999999999999, + "loss": 0.7249, + "step": 36120 + }, + { + "epoch": 0.36, + "learning_rate": 0.000504521052631579, + "loss": 0.718, + "step": 36130 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005044421052631578, + "loss": 0.7196, + "step": 36140 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005043631578947368, + "loss": 0.722, + "step": 36150 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005042842105263157, + "loss": 0.7193, + "step": 36160 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005042052631578946, + "loss": 0.7274, + "step": 36170 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005041263157894737, + "loss": 0.7426, + "step": 36180 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005040473684210525, + "loss": 0.7385, + "step": 36190 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005039684210526316, + "loss": 0.7281, + "step": 36200 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005038894736842104, + "loss": 0.7254, + "step": 36210 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005038105263157894, + "loss": 0.7201, + "step": 36220 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005037315789473683, + "loss": 0.7271, + "step": 36230 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005036526315789473, + "loss": 0.712, + "step": 36240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005035736842105262, + "loss": 0.7131, + "step": 36250 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005034947368421052, + "loss": 0.7257, + "step": 36260 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005034157894736842, + "loss": 0.7171, + "step": 36270 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005033368421052631, + "loss": 0.7261, + "step": 36280 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005032578947368421, + "loss": 0.7113, + "step": 36290 + }, + { + "epoch": 0.36, + "learning_rate": 0.000503178947368421, + "loss": 0.7236, + "step": 36300 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005030999999999999, + "loss": 0.7233, + "step": 36310 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005030210526315789, + "loss": 0.7248, + "step": 36320 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005029421052631578, + "loss": 0.7149, + "step": 36330 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005028631578947368, + "loss": 0.7202, + "step": 36340 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005027842105263157, + "loss": 0.7303, + "step": 36350 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005027052631578948, + "loss": 0.7318, + "step": 36360 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005026263157894736, + "loss": 0.7295, + "step": 36370 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005025473684210526, + "loss": 0.7153, + "step": 36380 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005024684210526315, + "loss": 0.7242, + "step": 36390 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005023894736842105, + "loss": 0.7082, + "step": 36400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005023105263157894, + "loss": 0.7298, + "step": 36410 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005022315789473684, + "loss": 0.7359, + "step": 36420 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005021526315789473, + "loss": 0.7232, + "step": 36430 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005020736842105263, + "loss": 0.7138, + "step": 36440 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005019947368421052, + "loss": 0.7274, + "step": 36450 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005019157894736841, + "loss": 0.7177, + "step": 36460 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005018368421052631, + "loss": 0.7288, + "step": 36470 + }, + { + "epoch": 0.36, + "learning_rate": 0.000501757894736842, + "loss": 0.7133, + "step": 36480 + }, + { + "epoch": 0.36, + "learning_rate": 0.000501678947368421, + "loss": 0.7078, + "step": 36490 + }, + { + "epoch": 0.36, + "learning_rate": 0.0005015999999999999, + "loss": 0.7188, + "step": 36500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005015210526315789, + "loss": 0.7166, + "step": 36510 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005014421052631578, + "loss": 0.7206, + "step": 36520 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005013631578947368, + "loss": 0.7089, + "step": 36530 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005012842105263157, + "loss": 0.7096, + "step": 36540 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005012052631578947, + "loss": 0.7141, + "step": 36550 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005011263157894736, + "loss": 0.7171, + "step": 36560 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005010473684210526, + "loss": 0.7153, + "step": 36570 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005009684210526315, + "loss": 0.7071, + "step": 36580 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005008894736842105, + "loss": 0.7147, + "step": 36590 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005008105263157894, + "loss": 0.7072, + "step": 36600 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005007315789473684, + "loss": 0.7211, + "step": 36610 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005006526315789473, + "loss": 0.7277, + "step": 36620 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005005736842105263, + "loss": 0.721, + "step": 36630 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005004947368421052, + "loss": 0.713, + "step": 36640 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005004157894736842, + "loss": 0.7116, + "step": 36650 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005003368421052631, + "loss": 0.7118, + "step": 36660 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005002578947368421, + "loss": 0.7117, + "step": 36670 + }, + { + "epoch": 0.37, + "learning_rate": 0.000500178947368421, + "loss": 0.7096, + "step": 36680 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005001, + "loss": 0.7168, + "step": 36690 + }, + { + "epoch": 0.37, + "learning_rate": 0.0005000210526315789, + "loss": 0.7151, + "step": 36700 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004999421052631579, + "loss": 0.699, + "step": 36710 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004998631578947368, + "loss": 0.7075, + "step": 36720 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004997842105263158, + "loss": 0.7075, + "step": 36730 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004997052631578947, + "loss": 0.7169, + "step": 36740 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004996263157894736, + "loss": 0.7234, + "step": 36750 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004995473684210526, + "loss": 0.7185, + "step": 36760 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004994684210526315, + "loss": 0.7203, + "step": 36770 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004993894736842105, + "loss": 0.7247, + "step": 36780 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004993105263157894, + "loss": 0.7275, + "step": 36790 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004992315789473684, + "loss": 0.7052, + "step": 36800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004991526315789473, + "loss": 0.7142, + "step": 36810 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004990736842105263, + "loss": 0.7164, + "step": 36820 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004989947368421052, + "loss": 0.7231, + "step": 36830 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004989157894736842, + "loss": 0.7199, + "step": 36840 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004988368421052631, + "loss": 0.7085, + "step": 36850 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004987578947368421, + "loss": 0.7109, + "step": 36860 + }, + { + "epoch": 0.37, + "learning_rate": 0.000498678947368421, + "loss": 0.7202, + "step": 36870 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004986, + "loss": 0.7232, + "step": 36880 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004985210526315789, + "loss": 0.7137, + "step": 36890 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004984421052631579, + "loss": 0.721, + "step": 36900 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004983631578947368, + "loss": 0.7191, + "step": 36910 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004982842105263158, + "loss": 0.7214, + "step": 36920 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004982052631578947, + "loss": 0.7221, + "step": 36930 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004981263157894737, + "loss": 0.7255, + "step": 36940 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004980473684210526, + "loss": 0.7214, + "step": 36950 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004979684210526316, + "loss": 0.7181, + "step": 36960 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004978894736842105, + "loss": 0.7225, + "step": 36970 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004978105263157895, + "loss": 0.7284, + "step": 36980 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004977315789473683, + "loss": 0.7201, + "step": 36990 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004976526315789474, + "loss": 0.727, + "step": 37000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004975736842105263, + "loss": 0.7146, + "step": 37010 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004974947368421053, + "loss": 0.7201, + "step": 37020 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004974236842105262, + "loss": 0.7216, + "step": 37030 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004973447368421053, + "loss": 0.7165, + "step": 37040 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004972657894736841, + "loss": 0.7176, + "step": 37050 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004971868421052631, + "loss": 0.7259, + "step": 37060 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004971078947368421, + "loss": 0.7075, + "step": 37070 + }, + { + "epoch": 0.37, + "learning_rate": 0.000497028947368421, + "loss": 0.7061, + "step": 37080 + }, + { + "epoch": 0.37, + "learning_rate": 0.00049695, + "loss": 0.7284, + "step": 37090 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004968710526315789, + "loss": 0.7247, + "step": 37100 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004967921052631579, + "loss": 0.7131, + "step": 37110 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004967131578947368, + "loss": 0.7106, + "step": 37120 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004966342105263158, + "loss": 0.7251, + "step": 37130 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004965552631578947, + "loss": 0.7151, + "step": 37140 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004964763157894736, + "loss": 0.7147, + "step": 37150 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004963973684210525, + "loss": 0.7136, + "step": 37160 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004963184210526315, + "loss": 0.6948, + "step": 37170 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004962394736842106, + "loss": 0.7092, + "step": 37180 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004961605263157894, + "loss": 0.7282, + "step": 37190 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004960815789473684, + "loss": 0.7155, + "step": 37200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004960026315789473, + "loss": 0.7101, + "step": 37210 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004959236842105262, + "loss": 0.7093, + "step": 37220 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004958447368421052, + "loss": 0.695, + "step": 37230 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004957657894736841, + "loss": 0.7034, + "step": 37240 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004956868421052631, + "loss": 0.6962, + "step": 37250 + }, + { + "epoch": 0.37, + "learning_rate": 0.000495607894736842, + "loss": 0.697, + "step": 37260 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004955289473684211, + "loss": 0.6937, + "step": 37270 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004954499999999999, + "loss": 0.7066, + "step": 37280 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004953710526315789, + "loss": 0.6947, + "step": 37290 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004952921052631578, + "loss": 0.6934, + "step": 37300 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004952131578947368, + "loss": 0.7065, + "step": 37310 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004951342105263157, + "loss": 0.6958, + "step": 37320 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004950552631578947, + "loss": 0.6939, + "step": 37330 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004949763157894736, + "loss": 0.7098, + "step": 37340 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004948973684210526, + "loss": 0.6964, + "step": 37350 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004948184210526315, + "loss": 0.7114, + "step": 37360 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004947394736842105, + "loss": 0.699, + "step": 37370 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004946605263157894, + "loss": 0.7035, + "step": 37380 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004945815789473684, + "loss": 0.7026, + "step": 37390 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004945026315789473, + "loss": 0.6915, + "step": 37400 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004944236842105263, + "loss": 0.7018, + "step": 37410 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004943447368421052, + "loss": 0.6961, + "step": 37420 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004942657894736842, + "loss": 0.7012, + "step": 37430 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004941868421052631, + "loss": 0.6963, + "step": 37440 + }, + { + "epoch": 0.37, + "learning_rate": 0.000494107894736842, + "loss": 0.6939, + "step": 37450 + }, + { + "epoch": 0.37, + "learning_rate": 0.000494028947368421, + "loss": 0.7096, + "step": 37460 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004939499999999999, + "loss": 0.7065, + "step": 37470 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004938710526315789, + "loss": 0.7188, + "step": 37480 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004937921052631578, + "loss": 0.7202, + "step": 37490 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004937131578947368, + "loss": 0.719, + "step": 37500 + }, + { + "epoch": 0.38, + "eval_accuracy": 0.8522274619295023, + "eval_loss": 0.70361328125, + "eval_runtime": 97.0969, + "eval_samples_per_second": 823.919, + "eval_steps_per_second": 1.617, + "step": 37500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004936342105263157, + "loss": 0.7156, + "step": 37510 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004935552631578947, + "loss": 0.7196, + "step": 37520 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004934763157894736, + "loss": 0.715, + "step": 37530 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004933973684210526, + "loss": 0.7078, + "step": 37540 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004933184210526315, + "loss": 0.7027, + "step": 37550 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004932394736842105, + "loss": 0.7141, + "step": 37560 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004931605263157894, + "loss": 0.7137, + "step": 37570 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004930815789473684, + "loss": 0.7156, + "step": 37580 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004930026315789473, + "loss": 0.7046, + "step": 37590 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004929236842105263, + "loss": 0.7017, + "step": 37600 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004928447368421052, + "loss": 0.7096, + "step": 37610 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004927657894736842, + "loss": 0.7229, + "step": 37620 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004926868421052631, + "loss": 0.7186, + "step": 37630 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004926078947368421, + "loss": 0.7103, + "step": 37640 + }, + { + "epoch": 0.38, + "learning_rate": 0.000492528947368421, + "loss": 0.7069, + "step": 37650 + }, + { + "epoch": 0.38, + "learning_rate": 0.00049245, + "loss": 0.7328, + "step": 37660 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004923710526315789, + "loss": 0.7067, + "step": 37670 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004922921052631579, + "loss": 0.7214, + "step": 37680 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004922131578947368, + "loss": 0.7246, + "step": 37690 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004921342105263158, + "loss": 0.72, + "step": 37700 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004920552631578947, + "loss": 0.7203, + "step": 37710 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004919763157894737, + "loss": 0.7188, + "step": 37720 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004918973684210526, + "loss": 0.7234, + "step": 37730 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004918184210526315, + "loss": 0.7165, + "step": 37740 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004917394736842105, + "loss": 0.7195, + "step": 37750 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004916605263157894, + "loss": 0.7046, + "step": 37760 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004915815789473684, + "loss": 0.7145, + "step": 37770 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004915026315789473, + "loss": 0.7107, + "step": 37780 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004914236842105263, + "loss": 0.7143, + "step": 37790 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004913447368421052, + "loss": 0.7283, + "step": 37800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004912657894736842, + "loss": 0.7108, + "step": 37810 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004911868421052631, + "loss": 0.699, + "step": 37820 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004911078947368421, + "loss": 0.7078, + "step": 37830 + }, + { + "epoch": 0.38, + "learning_rate": 0.000491028947368421, + "loss": 0.7112, + "step": 37840 + }, + { + "epoch": 0.38, + "learning_rate": 0.00049095, + "loss": 0.713, + "step": 37850 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004908710526315789, + "loss": 0.7074, + "step": 37860 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004907921052631579, + "loss": 0.7112, + "step": 37870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004907131578947368, + "loss": 0.7033, + "step": 37880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004906342105263158, + "loss": 0.7322, + "step": 37890 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004905552631578946, + "loss": 0.7089, + "step": 37900 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004904763157894737, + "loss": 0.7081, + "step": 37910 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004903973684210526, + "loss": 0.7105, + "step": 37920 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004903184210526316, + "loss": 0.7295, + "step": 37930 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004902394736842105, + "loss": 0.6959, + "step": 37940 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004901605263157895, + "loss": 0.7188, + "step": 37950 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004900815789473684, + "loss": 0.7233, + "step": 37960 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004900026315789473, + "loss": 0.7068, + "step": 37970 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004899236842105263, + "loss": 0.7028, + "step": 37980 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004898447368421051, + "loss": 0.7088, + "step": 37990 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004897657894736842, + "loss": 0.7146, + "step": 38000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004896868421052632, + "loss": 0.7026, + "step": 38010 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004896078947368421, + "loss": 0.6985, + "step": 38020 + }, + { + "epoch": 0.38, + "learning_rate": 0.000489528947368421, + "loss": 0.7207, + "step": 38030 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004894578947368421, + "loss": 0.7109, + "step": 38040 + }, + { + "epoch": 0.38, + "learning_rate": 0.000489378947368421, + "loss": 0.7072, + "step": 38050 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004892999999999999, + "loss": 0.7085, + "step": 38060 + }, + { + "epoch": 0.38, + "learning_rate": 0.000489221052631579, + "loss": 0.6987, + "step": 38070 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004891421052631578, + "loss": 0.6996, + "step": 38080 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004890631578947369, + "loss": 0.6952, + "step": 38090 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004889842105263157, + "loss": 0.7141, + "step": 38100 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004889052631578948, + "loss": 0.6997, + "step": 38110 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004888263157894736, + "loss": 0.6953, + "step": 38120 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004887473684210526, + "loss": 0.7028, + "step": 38130 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004886684210526315, + "loss": 0.7287, + "step": 38140 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004885894736842104, + "loss": 0.7033, + "step": 38150 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004885105263157895, + "loss": 0.6955, + "step": 38160 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004884315789473683, + "loss": 0.7003, + "step": 38170 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004883526315789474, + "loss": 0.6893, + "step": 38180 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048827368421052624, + "loss": 0.7037, + "step": 38190 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048819473684210524, + "loss": 0.723, + "step": 38200 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048811578947368414, + "loss": 0.6862, + "step": 38210 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048803684210526314, + "loss": 0.7082, + "step": 38220 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048795789473684203, + "loss": 0.7009, + "step": 38230 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048787894736842104, + "loss": 0.7058, + "step": 38240 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048779999999999993, + "loss": 0.7091, + "step": 38250 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004877210526315789, + "loss": 0.7238, + "step": 38260 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004876421052631579, + "loss": 0.696, + "step": 38270 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004875631578947368, + "loss": 0.7026, + "step": 38280 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004874842105263158, + "loss": 0.7143, + "step": 38290 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048740526315789467, + "loss": 0.7052, + "step": 38300 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004873263157894737, + "loss": 0.7143, + "step": 38310 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048724736842105257, + "loss": 0.7069, + "step": 38320 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004871684210526315, + "loss": 0.71, + "step": 38330 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048708947368421047, + "loss": 0.695, + "step": 38340 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004870105263157894, + "loss": 0.7147, + "step": 38350 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004869315789473684, + "loss": 0.6984, + "step": 38360 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004868526315789473, + "loss": 0.6981, + "step": 38370 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004867736842105263, + "loss": 0.708, + "step": 38380 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004866947368421052, + "loss": 0.6915, + "step": 38390 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048661578947368416, + "loss": 0.6967, + "step": 38400 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004865368421052631, + "loss": 0.7076, + "step": 38410 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048645789473684205, + "loss": 0.7114, + "step": 38420 + }, + { + "epoch": 0.38, + "learning_rate": 0.000486378947368421, + "loss": 0.7053, + "step": 38430 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048629999999999995, + "loss": 0.6946, + "step": 38440 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048622105263157895, + "loss": 0.6925, + "step": 38450 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048614210526315785, + "loss": 0.6931, + "step": 38460 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048606315789473685, + "loss": 0.7136, + "step": 38470 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048598421052631574, + "loss": 0.7081, + "step": 38480 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004859052631578947, + "loss": 0.6948, + "step": 38490 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048582631578947364, + "loss": 0.6915, + "step": 38500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004857473684210526, + "loss": 0.6917, + "step": 38510 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004856684210526315, + "loss": 0.6973, + "step": 38520 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004855894736842105, + "loss": 0.7004, + "step": 38530 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004855105263157895, + "loss": 0.6954, + "step": 38540 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004854315789473684, + "loss": 0.6859, + "step": 38550 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048535263157894733, + "loss": 0.6932, + "step": 38560 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004852736842105263, + "loss": 0.6987, + "step": 38570 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004851947368421052, + "loss": 0.7112, + "step": 38580 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004851157894736842, + "loss": 0.6718, + "step": 38590 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004850368421052631, + "loss": 0.7012, + "step": 38600 + }, + { + "epoch": 0.39, + "learning_rate": 0.000484957894736842, + "loss": 0.7003, + "step": 38610 + }, + { + "epoch": 0.39, + "learning_rate": 0.000484878947368421, + "loss": 0.6851, + "step": 38620 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048479999999999997, + "loss": 0.6972, + "step": 38630 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004847210526315789, + "loss": 0.7078, + "step": 38640 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048464210526315786, + "loss": 0.7038, + "step": 38650 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004845631578947368, + "loss": 0.6966, + "step": 38660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048448421052631576, + "loss": 0.7014, + "step": 38670 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048440526315789465, + "loss": 0.7019, + "step": 38680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048432631578947366, + "loss": 0.7057, + "step": 38690 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048424736842105255, + "loss": 0.6955, + "step": 38700 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048416842105263155, + "loss": 0.6929, + "step": 38710 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004840894736842105, + "loss": 0.696, + "step": 38720 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048401052631578945, + "loss": 0.6963, + "step": 38730 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004839315789473684, + "loss": 0.6944, + "step": 38740 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004838526315789473, + "loss": 0.7043, + "step": 38750 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004837736842105263, + "loss": 0.6921, + "step": 38760 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004836947368421052, + "loss": 0.6853, + "step": 38770 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004836157894736842, + "loss": 0.6994, + "step": 38780 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004835368421052631, + "loss": 0.6953, + "step": 38790 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004834578947368421, + "loss": 0.6904, + "step": 38800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048337894736842104, + "loss": 0.6889, + "step": 38810 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048329999999999993, + "loss": 0.6786, + "step": 38820 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048322105263157893, + "loss": 0.6897, + "step": 38830 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004831421052631578, + "loss": 0.6931, + "step": 38840 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048306315789473683, + "loss": 0.6884, + "step": 38850 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004829842105263157, + "loss": 0.6975, + "step": 38860 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004829052631578947, + "loss": 0.7129, + "step": 38870 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004828263157894736, + "loss": 0.7159, + "step": 38880 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048274736842105257, + "loss": 0.6956, + "step": 38890 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004826684210526315, + "loss": 0.6964, + "step": 38900 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048258947368421046, + "loss": 0.7002, + "step": 38910 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048251052631578947, + "loss": 0.6982, + "step": 38920 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048243157894736836, + "loss": 0.7088, + "step": 38930 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048235263157894736, + "loss": 0.7087, + "step": 38940 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048227368421052626, + "loss": 0.7199, + "step": 38950 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048219473684210526, + "loss": 0.7218, + "step": 38960 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048211578947368415, + "loss": 0.6993, + "step": 38970 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004820368421052631, + "loss": 0.6952, + "step": 38980 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048195789473684205, + "loss": 0.6993, + "step": 38990 + }, + { + "epoch": 0.39, + "learning_rate": 0.000481878947368421, + "loss": 0.6955, + "step": 39000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004818, + "loss": 0.6888, + "step": 39010 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004817210526315789, + "loss": 0.6949, + "step": 39020 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004816421052631579, + "loss": 0.689, + "step": 39030 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004815710526315789, + "loss": 0.7001, + "step": 39040 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004814921052631578, + "loss": 0.6921, + "step": 39050 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004814131578947368, + "loss": 0.6863, + "step": 39060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048133421052631576, + "loss": 0.6886, + "step": 39070 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004812552631578947, + "loss": 0.6878, + "step": 39080 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048117631578947366, + "loss": 0.6889, + "step": 39090 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048109736842105255, + "loss": 0.6952, + "step": 39100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048101842105263156, + "loss": 0.7014, + "step": 39110 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048093947368421045, + "loss": 0.687, + "step": 39120 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048086052631578945, + "loss": 0.6916, + "step": 39130 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048078157894736835, + "loss": 0.6913, + "step": 39140 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048070263157894735, + "loss": 0.6941, + "step": 39150 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004806236842105263, + "loss": 0.6897, + "step": 39160 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004805447368421052, + "loss": 0.6934, + "step": 39170 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004804657894736842, + "loss": 0.6852, + "step": 39180 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004803868421052631, + "loss": 0.6965, + "step": 39190 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004803078947368421, + "loss": 0.6887, + "step": 39200 + }, + { + "epoch": 0.39, + "learning_rate": 0.000480228947368421, + "loss": 0.6916, + "step": 39210 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048015, + "loss": 0.6985, + "step": 39220 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004800710526315789, + "loss": 0.6974, + "step": 39230 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047999210526315783, + "loss": 0.6817, + "step": 39240 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047991315789473683, + "loss": 0.6742, + "step": 39250 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047983421052631573, + "loss": 0.681, + "step": 39260 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047975526315789473, + "loss": 0.6815, + "step": 39270 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004796763157894736, + "loss": 0.674, + "step": 39280 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004795973684210526, + "loss": 0.6891, + "step": 39290 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004795184210526315, + "loss": 0.6903, + "step": 39300 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047943947368421047, + "loss": 0.6884, + "step": 39310 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004793605263157894, + "loss": 0.7042, + "step": 39320 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047928157894736837, + "loss": 0.6875, + "step": 39330 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004792026315789473, + "loss": 0.6812, + "step": 39340 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047912368421052626, + "loss": 0.6878, + "step": 39350 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047904473684210526, + "loss": 0.6839, + "step": 39360 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047896578947368416, + "loss": 0.7026, + "step": 39370 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047888684210526316, + "loss": 0.6946, + "step": 39380 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047880789473684206, + "loss": 0.7005, + "step": 39390 + }, + { + "epoch": 0.39, + "learning_rate": 0.000478728947368421, + "loss": 0.7001, + "step": 39400 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047864999999999995, + "loss": 0.7012, + "step": 39410 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004785710526315789, + "loss": 0.7053, + "step": 39420 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004784921052631578, + "loss": 0.7048, + "step": 39430 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004784131578947368, + "loss": 0.7034, + "step": 39440 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004783342105263158, + "loss": 0.7107, + "step": 39450 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004782552631578947, + "loss": 0.7069, + "step": 39460 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047817631578947364, + "loss": 0.6973, + "step": 39470 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004780973684210526, + "loss": 0.7031, + "step": 39480 + }, + { + "epoch": 0.39, + "learning_rate": 0.00047801842105263154, + "loss": 0.7056, + "step": 39490 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004779394736842105, + "loss": 0.691, + "step": 39500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047786052631578944, + "loss": 0.6971, + "step": 39510 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047778157894736833, + "loss": 0.699, + "step": 39520 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047770263157894733, + "loss": 0.7114, + "step": 39530 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004776236842105263, + "loss": 0.7069, + "step": 39540 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047754473684210523, + "loss": 0.6815, + "step": 39550 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004774657894736842, + "loss": 0.7017, + "step": 39560 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004773868421052631, + "loss": 0.7047, + "step": 39570 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004773078947368421, + "loss": 0.6874, + "step": 39580 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047722894736842097, + "loss": 0.7093, + "step": 39590 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047714999999999997, + "loss": 0.695, + "step": 39600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047707105263157886, + "loss": 0.7006, + "step": 39610 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047699210526315787, + "loss": 0.713, + "step": 39620 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004769131578947368, + "loss": 0.7064, + "step": 39630 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047683421052631576, + "loss": 0.7092, + "step": 39640 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004767552631578947, + "loss": 0.708, + "step": 39650 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004766763157894736, + "loss": 0.6989, + "step": 39660 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004765973684210526, + "loss": 0.7091, + "step": 39670 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004765184210526315, + "loss": 0.6891, + "step": 39680 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004764394736842105, + "loss": 0.701, + "step": 39690 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004763605263157894, + "loss": 0.71, + "step": 39700 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004762815789473684, + "loss": 0.6988, + "step": 39710 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047620263157894735, + "loss": 0.6897, + "step": 39720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047612368421052624, + "loss": 0.6949, + "step": 39730 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047604473684210525, + "loss": 0.6938, + "step": 39740 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047596578947368414, + "loss": 0.6925, + "step": 39750 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047588684210526314, + "loss": 0.705, + "step": 39760 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047580789473684204, + "loss": 0.6964, + "step": 39770 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047572894736842104, + "loss": 0.6825, + "step": 39780 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047564999999999993, + "loss": 0.6854, + "step": 39790 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004755710526315789, + "loss": 0.7001, + "step": 39800 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004754921052631579, + "loss": 0.6817, + "step": 39810 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004754131578947368, + "loss": 0.6928, + "step": 39820 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004753342105263158, + "loss": 0.6887, + "step": 39830 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004752552631578947, + "loss": 0.6972, + "step": 39840 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004751763157894737, + "loss": 0.6946, + "step": 39850 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047509736842105257, + "loss": 0.6966, + "step": 39860 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004750184210526316, + "loss": 0.7063, + "step": 39870 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047493947368421047, + "loss": 0.6945, + "step": 39880 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004748605263157894, + "loss": 0.6974, + "step": 39890 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047478157894736836, + "loss": 0.6954, + "step": 39900 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004747026315789473, + "loss": 0.6986, + "step": 39910 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004746236842105263, + "loss": 0.7061, + "step": 39920 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004745447368421052, + "loss": 0.6927, + "step": 39930 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004744657894736842, + "loss": 0.6977, + "step": 39940 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004743868421052631, + "loss": 0.6994, + "step": 39950 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047430789473684205, + "loss": 0.6969, + "step": 39960 + }, + { + "epoch": 0.4, + "learning_rate": 0.000474228947368421, + "loss": 0.707, + "step": 39970 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047414999999999995, + "loss": 0.6954, + "step": 39980 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004740710526315789, + "loss": 0.7007, + "step": 39990 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047399210526315785, + "loss": 0.6871, + "step": 40000 + }, + { + "epoch": 0.4, + "eval_accuracy": 0.8555457696762774, + "eval_loss": 0.6865234375, + "eval_runtime": 97.6539, + "eval_samples_per_second": 819.22, + "eval_steps_per_second": 1.608, + "step": 40000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047391315789473685, + "loss": 0.6924, + "step": 40010 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047383421052631574, + "loss": 0.6849, + "step": 40020 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004737552631578947, + "loss": 0.6881, + "step": 40030 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047368421052631577, + "loss": 0.6896, + "step": 40040 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047360526315789466, + "loss": 0.6836, + "step": 40050 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047352631578947366, + "loss": 0.6836, + "step": 40060 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004734473684210526, + "loss": 0.6805, + "step": 40070 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004733684210526315, + "loss": 0.6913, + "step": 40080 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004732894736842105, + "loss": 0.6908, + "step": 40090 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004732105263157894, + "loss": 0.6953, + "step": 40100 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004731315789473684, + "loss": 0.6889, + "step": 40110 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004730526315789473, + "loss": 0.6899, + "step": 40120 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004729736842105263, + "loss": 0.7003, + "step": 40130 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004728947368421052, + "loss": 0.6891, + "step": 40140 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047281578947368415, + "loss": 0.6883, + "step": 40150 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047273684210526315, + "loss": 0.6824, + "step": 40160 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047265789473684204, + "loss": 0.6874, + "step": 40170 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047257894736842104, + "loss": 0.6972, + "step": 40180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047249999999999994, + "loss": 0.6928, + "step": 40190 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047242105263157894, + "loss": 0.6907, + "step": 40200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047234210526315783, + "loss": 0.6792, + "step": 40210 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004722631578947368, + "loss": 0.702, + "step": 40220 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047218421052631573, + "loss": 0.6839, + "step": 40230 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004721052631578947, + "loss": 0.6884, + "step": 40240 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004720263157894737, + "loss": 0.6862, + "step": 40250 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004719473684210526, + "loss": 0.6907, + "step": 40260 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004718684210526316, + "loss": 0.6965, + "step": 40270 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004717894736842105, + "loss": 0.6958, + "step": 40280 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004717105263157895, + "loss": 0.6888, + "step": 40290 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047163157894736837, + "loss": 0.682, + "step": 40300 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004715526315789473, + "loss": 0.6933, + "step": 40310 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047147368421052627, + "loss": 0.6881, + "step": 40320 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004713947368421052, + "loss": 0.6933, + "step": 40330 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004713157894736842, + "loss": 0.6976, + "step": 40340 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004712368421052631, + "loss": 0.6975, + "step": 40350 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004711578947368421, + "loss": 0.6913, + "step": 40360 + }, + { + "epoch": 0.4, + "learning_rate": 0.000471078947368421, + "loss": 0.6992, + "step": 40370 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047099999999999996, + "loss": 0.6916, + "step": 40380 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004709210526315789, + "loss": 0.6828, + "step": 40390 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047084210526315785, + "loss": 0.6956, + "step": 40400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004707631578947368, + "loss": 0.6947, + "step": 40410 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047068421052631575, + "loss": 0.6874, + "step": 40420 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047060526315789464, + "loss": 0.6968, + "step": 40430 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047052631578947365, + "loss": 0.7027, + "step": 40440 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004704473684210526, + "loss": 0.7011, + "step": 40450 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047036842105263154, + "loss": 0.7095, + "step": 40460 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004702894736842105, + "loss": 0.694, + "step": 40470 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047021052631578944, + "loss": 0.6841, + "step": 40480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004701315789473684, + "loss": 0.6947, + "step": 40490 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004700526315789473, + "loss": 0.6903, + "step": 40500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004699736842105263, + "loss": 0.6838, + "step": 40510 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004698947368421052, + "loss": 0.6863, + "step": 40520 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004698157894736842, + "loss": 0.6855, + "step": 40530 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046973684210526313, + "loss": 0.6874, + "step": 40540 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004696578947368421, + "loss": 0.7044, + "step": 40550 + }, + { + "epoch": 0.41, + "learning_rate": 0.000469578947368421, + "loss": 0.6961, + "step": 40560 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004694999999999999, + "loss": 0.695, + "step": 40570 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004694210526315789, + "loss": 0.6925, + "step": 40580 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004693421052631578, + "loss": 0.6871, + "step": 40590 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004692631578947368, + "loss": 0.6961, + "step": 40600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004691842105263157, + "loss": 0.6917, + "step": 40610 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004691052631578947, + "loss": 0.7046, + "step": 40620 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046902631578947366, + "loss": 0.6964, + "step": 40630 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046894736842105256, + "loss": 0.7025, + "step": 40640 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046886842105263156, + "loss": 0.697, + "step": 40650 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046878947368421045, + "loss": 0.7004, + "step": 40660 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046871052631578946, + "loss": 0.6905, + "step": 40670 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046863157894736835, + "loss": 0.6974, + "step": 40680 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046855263157894735, + "loss": 0.6892, + "step": 40690 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046847368421052625, + "loss": 0.703, + "step": 40700 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004683947368421052, + "loss": 0.6959, + "step": 40710 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004683157894736842, + "loss": 0.7007, + "step": 40720 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004682368421052631, + "loss": 0.7028, + "step": 40730 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004681578947368421, + "loss": 0.6976, + "step": 40740 + }, + { + "epoch": 0.41, + "learning_rate": 0.000468078947368421, + "loss": 0.6759, + "step": 40750 + }, + { + "epoch": 0.41, + "learning_rate": 0.000468, + "loss": 0.687, + "step": 40760 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004679210526315789, + "loss": 0.691, + "step": 40770 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004678421052631579, + "loss": 0.7051, + "step": 40780 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004677631578947368, + "loss": 0.6868, + "step": 40790 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046768421052631573, + "loss": 0.6897, + "step": 40800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046760526315789473, + "loss": 0.6961, + "step": 40810 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046752631578947363, + "loss": 0.6947, + "step": 40820 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046744736842105263, + "loss": 0.6867, + "step": 40830 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004673684210526315, + "loss": 0.7068, + "step": 40840 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004672894736842105, + "loss": 0.6843, + "step": 40850 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004672105263157894, + "loss": 0.6921, + "step": 40860 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046713157894736837, + "loss": 0.6965, + "step": 40870 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004670526315789473, + "loss": 0.6805, + "step": 40880 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046697368421052627, + "loss": 0.6899, + "step": 40890 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046689473684210527, + "loss": 0.7008, + "step": 40900 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046681578947368416, + "loss": 0.6849, + "step": 40910 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046673684210526316, + "loss": 0.6882, + "step": 40920 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046665789473684206, + "loss": 0.6899, + "step": 40930 + }, + { + "epoch": 0.41, + "learning_rate": 0.000466578947368421, + "loss": 0.6999, + "step": 40940 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046649999999999996, + "loss": 0.6869, + "step": 40950 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004664210526315789, + "loss": 0.6805, + "step": 40960 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046634210526315785, + "loss": 0.7013, + "step": 40970 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004662631578947368, + "loss": 0.7048, + "step": 40980 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004661842105263157, + "loss": 0.6954, + "step": 40990 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004661052631578947, + "loss": 0.6995, + "step": 41000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046602631578947364, + "loss": 0.6981, + "step": 41010 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004659473684210526, + "loss": 0.6892, + "step": 41020 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046586842105263154, + "loss": 0.7067, + "step": 41030 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004657973684210526, + "loss": 0.7051, + "step": 41040 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004657184210526315, + "loss": 0.6936, + "step": 41050 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046563947368421046, + "loss": 0.6929, + "step": 41060 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046556052631578946, + "loss": 0.6958, + "step": 41070 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046548157894736836, + "loss": 0.689, + "step": 41080 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046540263157894736, + "loss": 0.687, + "step": 41090 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046532368421052625, + "loss": 0.6948, + "step": 41100 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046524473684210525, + "loss": 0.6971, + "step": 41110 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046516578947368415, + "loss": 0.7012, + "step": 41120 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046508684210526315, + "loss": 0.6793, + "step": 41130 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046500789473684205, + "loss": 0.6915, + "step": 41140 + }, + { + "epoch": 0.41, + "learning_rate": 0.000464928947368421, + "loss": 0.6676, + "step": 41150 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046485, + "loss": 0.6703, + "step": 41160 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004647710526315789, + "loss": 0.6636, + "step": 41170 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004646921052631579, + "loss": 0.6597, + "step": 41180 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004646131578947368, + "loss": 0.6886, + "step": 41190 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004645342105263158, + "loss": 0.6793, + "step": 41200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004644552631578947, + "loss": 0.69, + "step": 41210 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046437631578947363, + "loss": 0.6747, + "step": 41220 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004642973684210526, + "loss": 0.686, + "step": 41230 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046421842105263153, + "loss": 0.6928, + "step": 41240 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046413947368421053, + "loss": 0.7013, + "step": 41250 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004640605263157894, + "loss": 0.695, + "step": 41260 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046398157894736843, + "loss": 0.6906, + "step": 41270 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004639026315789473, + "loss": 0.7013, + "step": 41280 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046382368421052627, + "loss": 0.6922, + "step": 41290 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004637447368421052, + "loss": 0.7055, + "step": 41300 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046366578947368417, + "loss": 0.7107, + "step": 41310 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004635868421052631, + "loss": 0.6923, + "step": 41320 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046350789473684206, + "loss": 0.7092, + "step": 41330 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046342894736842107, + "loss": 0.6952, + "step": 41340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046334999999999996, + "loss": 0.6947, + "step": 41350 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004632710526315789, + "loss": 0.682, + "step": 41360 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046319210526315786, + "loss": 0.6881, + "step": 41370 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004631131578947368, + "loss": 0.6981, + "step": 41380 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046303421052631575, + "loss": 0.6984, + "step": 41390 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004629552631578947, + "loss": 0.6828, + "step": 41400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004628763157894736, + "loss": 0.6994, + "step": 41410 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004627973684210526, + "loss": 0.7045, + "step": 41420 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046271842105263155, + "loss": 0.6925, + "step": 41430 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004626394736842105, + "loss": 0.6868, + "step": 41440 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046256052631578944, + "loss": 0.6992, + "step": 41450 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004624815789473684, + "loss": 0.7059, + "step": 41460 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046240263157894734, + "loss": 0.7006, + "step": 41470 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046232368421052623, + "loss": 0.7029, + "step": 41480 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046224473684210524, + "loss": 0.6921, + "step": 41490 + }, + { + "epoch": 0.41, + "learning_rate": 0.00046216578947368413, + "loss": 0.6974, + "step": 41500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046208684210526313, + "loss": 0.7081, + "step": 41510 + }, + { + "epoch": 0.42, + "learning_rate": 0.000462007894736842, + "loss": 0.6967, + "step": 41520 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046192894736842103, + "loss": 0.6858, + "step": 41530 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046185, + "loss": 0.6899, + "step": 41540 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046177105263157887, + "loss": 0.7043, + "step": 41550 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004616921052631579, + "loss": 0.7068, + "step": 41560 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046161315789473677, + "loss": 0.6884, + "step": 41570 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046153421052631577, + "loss": 0.6911, + "step": 41580 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046145526315789466, + "loss": 0.6755, + "step": 41590 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046137631578947367, + "loss": 0.6848, + "step": 41600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046129736842105256, + "loss": 0.6875, + "step": 41610 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046121842105263156, + "loss": 0.6835, + "step": 41620 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004611394736842105, + "loss": 0.6804, + "step": 41630 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004610605263157894, + "loss": 0.7054, + "step": 41640 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004609815789473684, + "loss": 0.7074, + "step": 41650 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004609026315789473, + "loss": 0.7067, + "step": 41660 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004608236842105263, + "loss": 0.689, + "step": 41670 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004607447368421052, + "loss": 0.6833, + "step": 41680 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004606657894736842, + "loss": 0.694, + "step": 41690 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004605868421052631, + "loss": 0.6868, + "step": 41700 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046050789473684204, + "loss": 0.6911, + "step": 41710 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046042894736842105, + "loss": 0.6926, + "step": 41720 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046034999999999994, + "loss": 0.7007, + "step": 41730 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046027105263157894, + "loss": 0.6976, + "step": 41740 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046019210526315784, + "loss": 0.6939, + "step": 41750 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046011315789473684, + "loss": 0.6798, + "step": 41760 + }, + { + "epoch": 0.42, + "learning_rate": 0.00046003421052631573, + "loss": 0.6686, + "step": 41770 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004599552631578947, + "loss": 0.6657, + "step": 41780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045987631578947363, + "loss": 0.6722, + "step": 41790 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004597973684210526, + "loss": 0.667, + "step": 41800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004597184210526316, + "loss": 0.702, + "step": 41810 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004596394736842105, + "loss": 0.6961, + "step": 41820 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004595605263157895, + "loss": 0.7083, + "step": 41830 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045948157894736837, + "loss": 0.7035, + "step": 41840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004594026315789473, + "loss": 0.6927, + "step": 41850 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045932368421052627, + "loss": 0.7025, + "step": 41860 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004592447368421052, + "loss": 0.7172, + "step": 41870 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045916578947368417, + "loss": 0.689, + "step": 41880 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004590868421052631, + "loss": 0.7104, + "step": 41890 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004590078947368421, + "loss": 0.6763, + "step": 41900 + }, + { + "epoch": 0.42, + "learning_rate": 0.000458928947368421, + "loss": 0.6951, + "step": 41910 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045884999999999996, + "loss": 0.6861, + "step": 41920 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004587710526315789, + "loss": 0.6988, + "step": 41930 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045869210526315786, + "loss": 0.705, + "step": 41940 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004586131578947368, + "loss": 0.6976, + "step": 41950 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045853421052631575, + "loss": 0.6956, + "step": 41960 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045845526315789465, + "loss": 0.7138, + "step": 41970 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045837631578947365, + "loss": 0.6899, + "step": 41980 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045829736842105265, + "loss": 0.6924, + "step": 41990 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045821842105263155, + "loss": 0.6994, + "step": 42000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004581394736842105, + "loss": 0.6916, + "step": 42010 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045806052631578944, + "loss": 0.6946, + "step": 42020 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004579815789473684, + "loss": 0.6987, + "step": 42030 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045791052631578947, + "loss": 0.6817, + "step": 42040 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045783157894736836, + "loss": 0.6914, + "step": 42050 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004577526315789473, + "loss": 0.6875, + "step": 42060 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004576736842105263, + "loss": 0.6995, + "step": 42070 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004575947368421052, + "loss": 0.6953, + "step": 42080 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004575157894736842, + "loss": 0.6829, + "step": 42090 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004574368421052631, + "loss": 0.6764, + "step": 42100 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004573578947368421, + "loss": 0.6876, + "step": 42110 + }, + { + "epoch": 0.42, + "learning_rate": 0.000457278947368421, + "loss": 0.705, + "step": 42120 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045719999999999995, + "loss": 0.6856, + "step": 42130 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004571210526315789, + "loss": 0.6913, + "step": 42140 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045704210526315784, + "loss": 0.6964, + "step": 42150 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045696315789473684, + "loss": 0.6934, + "step": 42160 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045688421052631574, + "loss": 0.6899, + "step": 42170 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045680526315789474, + "loss": 0.693, + "step": 42180 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045672631578947364, + "loss": 0.684, + "step": 42190 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004566473684210526, + "loss": 0.6944, + "step": 42200 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045656842105263153, + "loss": 0.6775, + "step": 42210 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004564894736842105, + "loss": 0.6865, + "step": 42220 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045641052631578943, + "loss": 0.6876, + "step": 42230 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004563315789473684, + "loss": 0.6855, + "step": 42240 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004562526315789474, + "loss": 0.6881, + "step": 42250 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004561736842105263, + "loss": 0.6939, + "step": 42260 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004560947368421052, + "loss": 0.6921, + "step": 42270 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045601578947368417, + "loss": 0.6904, + "step": 42280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004559368421052631, + "loss": 0.6913, + "step": 42290 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045585789473684207, + "loss": 0.6889, + "step": 42300 + }, + { + "epoch": 0.42, + "learning_rate": 0.000455778947368421, + "loss": 0.675, + "step": 42310 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004556999999999999, + "loss": 0.6869, + "step": 42320 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004556210526315789, + "loss": 0.6844, + "step": 42330 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045554210526315786, + "loss": 0.6827, + "step": 42340 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004554631578947368, + "loss": 0.6916, + "step": 42350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045538421052631576, + "loss": 0.6821, + "step": 42360 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004553052631578947, + "loss": 0.6998, + "step": 42370 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045522631578947365, + "loss": 0.6859, + "step": 42380 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045514736842105255, + "loss": 0.6947, + "step": 42390 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045506842105263155, + "loss": 0.685, + "step": 42400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045498947368421044, + "loss": 0.6817, + "step": 42410 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045491052631578945, + "loss": 0.6796, + "step": 42420 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004548315789473684, + "loss": 0.6856, + "step": 42430 + }, + { + "epoch": 0.42, + "learning_rate": 0.00045475263157894734, + "loss": 0.699, + "step": 42440 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004546736842105263, + "loss": 0.6971, + "step": 42450 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004545947368421052, + "loss": 0.6875, + "step": 42460 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004545157894736842, + "loss": 0.6906, + "step": 42470 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004544368421052631, + "loss": 0.6809, + "step": 42480 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004543578947368421, + "loss": 0.7047, + "step": 42490 + }, + { + "epoch": 0.42, + "learning_rate": 0.000454278947368421, + "loss": 0.6838, + "step": 42500 + }, + { + "epoch": 0.42, + "eval_accuracy": 0.8575258035447281, + "eval_loss": 0.677734375, + "eval_runtime": 96.7246, + "eval_samples_per_second": 827.09, + "eval_steps_per_second": 1.623, + "step": 42500 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004542, + "loss": 0.6802, + "step": 42510 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004541210526315789, + "loss": 0.6877, + "step": 42520 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004540421052631579, + "loss": 0.6861, + "step": 42530 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004539631578947368, + "loss": 0.6852, + "step": 42540 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004538842105263157, + "loss": 0.6799, + "step": 42550 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004538052631578947, + "loss": 0.6797, + "step": 42560 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004537263157894736, + "loss": 0.6812, + "step": 42570 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004536473684210526, + "loss": 0.6601, + "step": 42580 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004535684210526315, + "loss": 0.675, + "step": 42590 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004534894736842105, + "loss": 0.6841, + "step": 42600 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004534105263157894, + "loss": 0.6829, + "step": 42610 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045333157894736836, + "loss": 0.6653, + "step": 42620 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045325263157894736, + "loss": 0.6829, + "step": 42630 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045317368421052626, + "loss": 0.6849, + "step": 42640 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045309473684210526, + "loss": 0.6735, + "step": 42650 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045301578947368415, + "loss": 0.6897, + "step": 42660 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045293684210526315, + "loss": 0.6926, + "step": 42670 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045285789473684205, + "loss": 0.6934, + "step": 42680 + }, + { + "epoch": 0.43, + "learning_rate": 0.000452778947368421, + "loss": 0.6712, + "step": 42690 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045269999999999994, + "loss": 0.6946, + "step": 42700 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004526210526315789, + "loss": 0.673, + "step": 42710 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004525421052631579, + "loss": 0.6845, + "step": 42720 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004524631578947368, + "loss": 0.6793, + "step": 42730 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004523842105263158, + "loss": 0.6748, + "step": 42740 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004523052631578947, + "loss": 0.6857, + "step": 42750 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045222631578947363, + "loss": 0.6747, + "step": 42760 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004521473684210526, + "loss": 0.6798, + "step": 42770 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045206842105263153, + "loss": 0.6589, + "step": 42780 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004519894736842105, + "loss": 0.6727, + "step": 42790 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045191052631578943, + "loss": 0.6831, + "step": 42800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045183157894736843, + "loss": 0.6783, + "step": 42810 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004517526315789473, + "loss": 0.6891, + "step": 42820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045167368421052627, + "loss": 0.6791, + "step": 42830 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004515947368421052, + "loss": 0.674, + "step": 42840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045151578947368417, + "loss": 0.6715, + "step": 42850 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004514368421052631, + "loss": 0.6802, + "step": 42860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045135789473684207, + "loss": 0.6719, + "step": 42870 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045127894736842096, + "loss": 0.6785, + "step": 42880 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045119999999999996, + "loss": 0.6824, + "step": 42890 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045112105263157897, + "loss": 0.6907, + "step": 42900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045104210526315786, + "loss": 0.6926, + "step": 42910 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004509631578947368, + "loss": 0.6813, + "step": 42920 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045088421052631576, + "loss": 0.6878, + "step": 42930 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004508052631578947, + "loss": 0.6756, + "step": 42940 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004507263157894736, + "loss": 0.6811, + "step": 42950 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004506473684210526, + "loss": 0.6798, + "step": 42960 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004505684210526315, + "loss": 0.6847, + "step": 42970 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004504894736842105, + "loss": 0.6842, + "step": 42980 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045041052631578945, + "loss": 0.6751, + "step": 42990 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004503315789473684, + "loss": 0.6784, + "step": 43000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045025263157894734, + "loss": 0.6883, + "step": 43010 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004501736842105263, + "loss": 0.676, + "step": 43020 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045009473684210524, + "loss": 0.6871, + "step": 43030 + }, + { + "epoch": 0.43, + "learning_rate": 0.00045001578947368413, + "loss": 0.683, + "step": 43040 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004499447368421052, + "loss": 0.6733, + "step": 43050 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044986578947368416, + "loss": 0.684, + "step": 43060 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044978684210526316, + "loss": 0.6839, + "step": 43070 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044970789473684205, + "loss": 0.6992, + "step": 43080 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044962894736842106, + "loss": 0.6915, + "step": 43090 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044954999999999995, + "loss": 0.6869, + "step": 43100 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004494710526315789, + "loss": 0.6782, + "step": 43110 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044939210526315785, + "loss": 0.6847, + "step": 43120 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004493131578947368, + "loss": 0.6886, + "step": 43130 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044923421052631574, + "loss": 0.6848, + "step": 43140 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004491552631578947, + "loss": 0.6659, + "step": 43150 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004490763157894737, + "loss": 0.686, + "step": 43160 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004489973684210526, + "loss": 0.678, + "step": 43170 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044891842105263154, + "loss": 0.6871, + "step": 43180 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004488394736842105, + "loss": 0.6864, + "step": 43190 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044876052631578943, + "loss": 0.6782, + "step": 43200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004486815789473684, + "loss": 0.6772, + "step": 43210 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044860263157894733, + "loss": 0.6797, + "step": 43220 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004485236842105262, + "loss": 0.6743, + "step": 43230 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004484447368421052, + "loss": 0.6811, + "step": 43240 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004483657894736842, + "loss": 0.6851, + "step": 43250 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004482868421052631, + "loss": 0.6763, + "step": 43260 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044820789473684207, + "loss": 0.6883, + "step": 43270 + }, + { + "epoch": 0.43, + "learning_rate": 0.000448128947368421, + "loss": 0.6758, + "step": 43280 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044804999999999997, + "loss": 0.6941, + "step": 43290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044797105263157886, + "loss": 0.6786, + "step": 43300 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044789210526315786, + "loss": 0.6783, + "step": 43310 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044781315789473676, + "loss": 0.6819, + "step": 43320 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044773421052631576, + "loss": 0.6831, + "step": 43330 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004476552631578947, + "loss": 0.6831, + "step": 43340 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044757631578947366, + "loss": 0.6886, + "step": 43350 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004474973684210526, + "loss": 0.6761, + "step": 43360 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004474184210526315, + "loss": 0.6773, + "step": 43370 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004473394736842105, + "loss": 0.684, + "step": 43380 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004472605263157894, + "loss": 0.6911, + "step": 43390 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004471815789473684, + "loss": 0.6846, + "step": 43400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004471026315789473, + "loss": 0.6824, + "step": 43410 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004470236842105263, + "loss": 0.679, + "step": 43420 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044694473684210524, + "loss": 0.6858, + "step": 43430 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004468657894736842, + "loss": 0.6865, + "step": 43440 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044678684210526314, + "loss": 0.6831, + "step": 43450 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044670789473684203, + "loss": 0.6761, + "step": 43460 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044662894736842104, + "loss": 0.6728, + "step": 43470 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044654999999999993, + "loss": 0.6763, + "step": 43480 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044647105263157893, + "loss": 0.6937, + "step": 43490 + }, + { + "epoch": 0.43, + "learning_rate": 0.00044639210526315783, + "loss": 0.6782, + "step": 43500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044631315789473683, + "loss": 0.6802, + "step": 43510 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004462342105263158, + "loss": 0.6843, + "step": 43520 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044615526315789467, + "loss": 0.6806, + "step": 43530 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004460763157894737, + "loss": 0.6702, + "step": 43540 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044599736842105257, + "loss": 0.673, + "step": 43550 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044591842105263157, + "loss": 0.6804, + "step": 43560 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044583947368421047, + "loss": 0.6717, + "step": 43570 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044576052631578947, + "loss": 0.6729, + "step": 43580 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044568157894736836, + "loss": 0.6647, + "step": 43590 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004456026315789473, + "loss": 0.666, + "step": 43600 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044552368421052626, + "loss": 0.6766, + "step": 43610 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004454447368421052, + "loss": 0.6738, + "step": 43620 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004453657894736842, + "loss": 0.6589, + "step": 43630 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004452868421052631, + "loss": 0.6726, + "step": 43640 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004452078947368421, + "loss": 0.6659, + "step": 43650 + }, + { + "epoch": 0.44, + "learning_rate": 0.000445128947368421, + "loss": 0.6782, + "step": 43660 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044504999999999995, + "loss": 0.6578, + "step": 43670 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004449710526315789, + "loss": 0.6786, + "step": 43680 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044489210526315785, + "loss": 0.6603, + "step": 43690 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004448131578947368, + "loss": 0.655, + "step": 43700 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044473421052631574, + "loss": 0.6451, + "step": 43710 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044465526315789474, + "loss": 0.6692, + "step": 43720 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044457631578947364, + "loss": 0.6575, + "step": 43730 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004444973684210526, + "loss": 0.6682, + "step": 43740 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044441842105263154, + "loss": 0.6701, + "step": 43750 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004443394736842105, + "loss": 0.681, + "step": 43760 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044426052631578943, + "loss": 0.6744, + "step": 43770 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004441815789473684, + "loss": 0.6838, + "step": 43780 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004441026315789473, + "loss": 0.6789, + "step": 43790 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004440236842105263, + "loss": 0.6821, + "step": 43800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004439447368421053, + "loss": 0.6697, + "step": 43810 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004438657894736842, + "loss": 0.6769, + "step": 43820 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004437868421052631, + "loss": 0.6616, + "step": 43830 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044370789473684207, + "loss": 0.6701, + "step": 43840 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004436368421052631, + "loss": 0.6669, + "step": 43850 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004435578947368421, + "loss": 0.6583, + "step": 43860 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044347894736842104, + "loss": 0.6521, + "step": 43870 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044339999999999994, + "loss": 0.6759, + "step": 43880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044332105263157894, + "loss": 0.6624, + "step": 43890 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044324210526315783, + "loss": 0.6608, + "step": 43900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044316315789473683, + "loss": 0.6528, + "step": 43910 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044308421052631573, + "loss": 0.6748, + "step": 43920 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044300526315789473, + "loss": 0.6503, + "step": 43930 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004429263157894736, + "loss": 0.6682, + "step": 43940 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004428473684210526, + "loss": 0.6718, + "step": 43950 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004427684210526316, + "loss": 0.6745, + "step": 43960 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044268947368421047, + "loss": 0.6619, + "step": 43970 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004426105263157895, + "loss": 0.6703, + "step": 43980 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044253157894736837, + "loss": 0.6618, + "step": 43990 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044245263157894737, + "loss": 0.6658, + "step": 44000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044237368421052626, + "loss": 0.6844, + "step": 44010 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004422947368421052, + "loss": 0.6885, + "step": 44020 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044221578947368416, + "loss": 0.6581, + "step": 44030 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004421368421052631, + "loss": 0.6635, + "step": 44040 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004420578947368421, + "loss": 0.6656, + "step": 44050 + }, + { + "epoch": 0.44, + "learning_rate": 0.000441978947368421, + "loss": 0.6661, + "step": 44060 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004419, + "loss": 0.6651, + "step": 44070 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004418210526315789, + "loss": 0.6614, + "step": 44080 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044174210526315785, + "loss": 0.6768, + "step": 44090 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004416631578947368, + "loss": 0.6879, + "step": 44100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044158421052631575, + "loss": 0.6754, + "step": 44110 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004415052631578947, + "loss": 0.6872, + "step": 44120 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044142631578947364, + "loss": 0.6782, + "step": 44130 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044134736842105254, + "loss": 0.6851, + "step": 44140 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044126842105263154, + "loss": 0.6729, + "step": 44150 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004411894736842105, + "loss": 0.691, + "step": 44160 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044111052631578944, + "loss": 0.6858, + "step": 44170 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004410315789473684, + "loss": 0.6821, + "step": 44180 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044095263157894733, + "loss": 0.6695, + "step": 44190 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004408736842105263, + "loss": 0.6793, + "step": 44200 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004407947368421052, + "loss": 0.6671, + "step": 44210 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004407157894736842, + "loss": 0.6732, + "step": 44220 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044063684210526307, + "loss": 0.6715, + "step": 44230 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004405578947368421, + "loss": 0.6761, + "step": 44240 + }, + { + "epoch": 0.44, + "learning_rate": 0.000440478947368421, + "loss": 0.6714, + "step": 44250 + }, + { + "epoch": 0.44, + "learning_rate": 0.00044039999999999997, + "loss": 0.6802, + "step": 44260 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004403210526315789, + "loss": 0.6855, + "step": 44270 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004402421052631578, + "loss": 0.6826, + "step": 44280 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004401631578947368, + "loss": 0.6783, + "step": 44290 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004400842105263157, + "loss": 0.6795, + "step": 44300 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004400052631578947, + "loss": 0.6872, + "step": 44310 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004399263157894736, + "loss": 0.6884, + "step": 44320 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004398473684210526, + "loss": 0.6806, + "step": 44330 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043976842105263156, + "loss": 0.679, + "step": 44340 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004396894736842105, + "loss": 0.6755, + "step": 44350 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043961052631578945, + "loss": 0.692, + "step": 44360 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043953157894736835, + "loss": 0.6801, + "step": 44370 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043945263157894735, + "loss": 0.6728, + "step": 44380 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043937368421052624, + "loss": 0.675, + "step": 44390 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043929473684210525, + "loss": 0.669, + "step": 44400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043921578947368414, + "loss": 0.6866, + "step": 44410 + }, + { + "epoch": 0.44, + "learning_rate": 0.00043913684210526314, + "loss": 0.6831, + "step": 44420 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004390578947368421, + "loss": 0.6796, + "step": 44430 + }, + { + "epoch": 0.44, + "learning_rate": 0.000438978947368421, + "loss": 0.69, + "step": 44440 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004389, + "loss": 0.6708, + "step": 44450 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004388210526315789, + "loss": 0.6892, + "step": 44460 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004387421052631579, + "loss": 0.6883, + "step": 44470 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004386631578947368, + "loss": 0.6838, + "step": 44480 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004385842105263158, + "loss": 0.6874, + "step": 44490 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004385052631578947, + "loss": 0.7018, + "step": 44500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004384263157894736, + "loss": 0.6789, + "step": 44510 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004383473684210526, + "loss": 0.6831, + "step": 44520 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004382684210526315, + "loss": 0.6841, + "step": 44530 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004381894736842105, + "loss": 0.6891, + "step": 44540 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004381105263157894, + "loss": 0.676, + "step": 44550 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004380315789473684, + "loss": 0.6773, + "step": 44560 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004379526315789473, + "loss": 0.6804, + "step": 44570 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043787368421052626, + "loss": 0.6738, + "step": 44580 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004377947368421052, + "loss": 0.6774, + "step": 44590 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043771578947368416, + "loss": 0.6737, + "step": 44600 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043763684210526316, + "loss": 0.6809, + "step": 44610 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043755789473684206, + "loss": 0.6816, + "step": 44620 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043747894736842106, + "loss": 0.6745, + "step": 44630 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043739999999999995, + "loss": 0.6779, + "step": 44640 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004373210526315789, + "loss": 0.6817, + "step": 44650 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043724210526315785, + "loss": 0.6768, + "step": 44660 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004371631578947368, + "loss": 0.6703, + "step": 44670 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043708421052631575, + "loss": 0.674, + "step": 44680 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004370052631578947, + "loss": 0.6787, + "step": 44690 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004369263157894736, + "loss": 0.6882, + "step": 44700 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004368473684210526, + "loss": 0.6762, + "step": 44710 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004367684210526316, + "loss": 0.6687, + "step": 44720 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004366894736842105, + "loss": 0.6868, + "step": 44730 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043661052631578944, + "loss": 0.6633, + "step": 44740 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004365315789473684, + "loss": 0.6732, + "step": 44750 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043645263157894733, + "loss": 0.6595, + "step": 44760 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004363736842105262, + "loss": 0.6707, + "step": 44770 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043629473684210523, + "loss": 0.6553, + "step": 44780 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004362157894736841, + "loss": 0.6675, + "step": 44790 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004361368421052631, + "loss": 0.6593, + "step": 44800 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004360578947368421, + "loss": 0.6639, + "step": 44810 + }, + { + "epoch": 0.45, + "learning_rate": 0.000435978947368421, + "loss": 0.6538, + "step": 44820 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043589999999999997, + "loss": 0.6666, + "step": 44830 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004358210526315789, + "loss": 0.6583, + "step": 44840 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043574210526315787, + "loss": 0.6791, + "step": 44850 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043566315789473676, + "loss": 0.6504, + "step": 44860 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043558421052631576, + "loss": 0.6677, + "step": 44870 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043550526315789466, + "loss": 0.651, + "step": 44880 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043542631578947366, + "loss": 0.6632, + "step": 44890 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004353473684210526, + "loss": 0.6563, + "step": 44900 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043526842105263156, + "loss": 0.6656, + "step": 44910 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004351894736842105, + "loss": 0.6628, + "step": 44920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004351105263157894, + "loss": 0.6647, + "step": 44930 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004350315789473684, + "loss": 0.6707, + "step": 44940 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004349526315789473, + "loss": 0.675, + "step": 44950 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004348736842105263, + "loss": 0.6905, + "step": 44960 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004347947368421052, + "loss": 0.6732, + "step": 44970 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004347157894736842, + "loss": 0.6757, + "step": 44980 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043463684210526314, + "loss": 0.685, + "step": 44990 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043455789473684204, + "loss": 0.6752, + "step": 45000 + }, + { + "epoch": 0.45, + "eval_accuracy": 0.8599255154050108, + "eval_loss": 0.6640625, + "eval_runtime": 97.1461, + "eval_samples_per_second": 823.502, + "eval_steps_per_second": 1.616, + "step": 45000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043447894736842104, + "loss": 0.6799, + "step": 45010 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043439999999999993, + "loss": 0.6723, + "step": 45020 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043432105263157894, + "loss": 0.6637, + "step": 45030 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043424210526315783, + "loss": 0.6702, + "step": 45040 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043416315789473683, + "loss": 0.6607, + "step": 45050 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004340842105263157, + "loss": 0.6635, + "step": 45060 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004340052631578947, + "loss": 0.675, + "step": 45070 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004339263157894737, + "loss": 0.6741, + "step": 45080 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043384736842105257, + "loss": 0.6663, + "step": 45090 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004337684210526316, + "loss": 0.6532, + "step": 45100 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043368947368421047, + "loss": 0.6647, + "step": 45110 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043361052631578947, + "loss": 0.6682, + "step": 45120 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043353157894736837, + "loss": 0.658, + "step": 45130 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004334526315789473, + "loss": 0.6703, + "step": 45140 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043337368421052626, + "loss": 0.6698, + "step": 45150 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004332947368421052, + "loss": 0.6698, + "step": 45160 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004332157894736842, + "loss": 0.6666, + "step": 45170 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004331368421052631, + "loss": 0.6747, + "step": 45180 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004330578947368421, + "loss": 0.6839, + "step": 45190 + }, + { + "epoch": 0.45, + "learning_rate": 0.000432978947368421, + "loss": 0.6637, + "step": 45200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004329, + "loss": 0.6625, + "step": 45210 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004328210526315789, + "loss": 0.6749, + "step": 45220 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043274210526315785, + "loss": 0.6785, + "step": 45230 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004326631578947368, + "loss": 0.6743, + "step": 45240 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043258421052631574, + "loss": 0.6759, + "step": 45250 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043250526315789464, + "loss": 0.6694, + "step": 45260 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043242631578947364, + "loss": 0.6657, + "step": 45270 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043234736842105264, + "loss": 0.6585, + "step": 45280 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043226842105263154, + "loss": 0.668, + "step": 45290 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004321894736842105, + "loss": 0.6783, + "step": 45300 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043211052631578943, + "loss": 0.6774, + "step": 45310 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004320315789473684, + "loss": 0.674, + "step": 45320 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043195263157894733, + "loss": 0.6705, + "step": 45330 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004318736842105263, + "loss": 0.6672, + "step": 45340 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004317947368421052, + "loss": 0.6842, + "step": 45350 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004317157894736842, + "loss": 0.6807, + "step": 45360 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004316368421052631, + "loss": 0.6782, + "step": 45370 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043155789473684207, + "loss": 0.6739, + "step": 45380 + }, + { + "epoch": 0.45, + "learning_rate": 0.000431478947368421, + "loss": 0.6886, + "step": 45390 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043139999999999997, + "loss": 0.6809, + "step": 45400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004313210526315789, + "loss": 0.667, + "step": 45410 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004312421052631578, + "loss": 0.6836, + "step": 45420 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004311631578947368, + "loss": 0.6772, + "step": 45430 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004310842105263157, + "loss": 0.6763, + "step": 45440 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004310052631578947, + "loss": 0.6701, + "step": 45450 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043092631578947366, + "loss": 0.6785, + "step": 45460 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004308473684210526, + "loss": 0.6789, + "step": 45470 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043076842105263156, + "loss": 0.6808, + "step": 45480 + }, + { + "epoch": 0.45, + "learning_rate": 0.00043068947368421045, + "loss": 0.6843, + "step": 45490 + }, + { + "epoch": 0.46, + "learning_rate": 0.00043061052631578945, + "loss": 0.6633, + "step": 45500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00043053157894736835, + "loss": 0.6578, + "step": 45510 + }, + { + "epoch": 0.46, + "learning_rate": 0.00043045263157894735, + "loss": 0.6717, + "step": 45520 + }, + { + "epoch": 0.46, + "learning_rate": 0.00043037368421052624, + "loss": 0.669, + "step": 45530 + }, + { + "epoch": 0.46, + "learning_rate": 0.00043029473684210525, + "loss": 0.6861, + "step": 45540 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004302157894736842, + "loss": 0.6764, + "step": 45550 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004301368421052631, + "loss": 0.6799, + "step": 45560 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004300578947368421, + "loss": 0.6657, + "step": 45570 + }, + { + "epoch": 0.46, + "learning_rate": 0.000429978947368421, + "loss": 0.6633, + "step": 45580 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004299, + "loss": 0.6667, + "step": 45590 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004298210526315789, + "loss": 0.6721, + "step": 45600 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004297421052631579, + "loss": 0.6818, + "step": 45610 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004296631578947368, + "loss": 0.6824, + "step": 45620 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004295842105263157, + "loss": 0.6818, + "step": 45630 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042950526315789473, + "loss": 0.6684, + "step": 45640 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042943421052631575, + "loss": 0.6763, + "step": 45650 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004293552631578947, + "loss": 0.6786, + "step": 45660 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042927631578947365, + "loss": 0.6848, + "step": 45670 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004291973684210526, + "loss": 0.6851, + "step": 45680 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042911842105263154, + "loss": 0.6897, + "step": 45690 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042903947368421055, + "loss": 0.6916, + "step": 45700 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042896052631578944, + "loss": 0.6804, + "step": 45710 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004288815789473684, + "loss": 0.6782, + "step": 45720 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042880263157894734, + "loss": 0.6748, + "step": 45730 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004287236842105263, + "loss": 0.6799, + "step": 45740 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042864473684210523, + "loss": 0.6882, + "step": 45750 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004285657894736842, + "loss": 0.6796, + "step": 45760 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004284868421052631, + "loss": 0.6627, + "step": 45770 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004284078947368421, + "loss": 0.6646, + "step": 45780 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042832894736842097, + "loss": 0.6738, + "step": 45790 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042825, + "loss": 0.6697, + "step": 45800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004281710526315789, + "loss": 0.6758, + "step": 45810 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042809210526315787, + "loss": 0.6837, + "step": 45820 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004280131578947368, + "loss": 0.6833, + "step": 45830 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004279342105263157, + "loss": 0.6777, + "step": 45840 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004278552631578947, + "loss": 0.6806, + "step": 45850 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004277763157894736, + "loss": 0.6843, + "step": 45860 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004276973684210526, + "loss": 0.6858, + "step": 45870 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004276184210526315, + "loss": 0.6962, + "step": 45880 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004275394736842105, + "loss": 0.6877, + "step": 45890 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042746052631578946, + "loss": 0.684, + "step": 45900 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042738157894736835, + "loss": 0.6886, + "step": 45910 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042730263157894735, + "loss": 0.6726, + "step": 45920 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042722368421052625, + "loss": 0.6592, + "step": 45930 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042714473684210525, + "loss": 0.657, + "step": 45940 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042706578947368414, + "loss": 0.6604, + "step": 45950 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042698684210526315, + "loss": 0.6607, + "step": 45960 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042690789473684204, + "loss": 0.6795, + "step": 45970 + }, + { + "epoch": 0.46, + "learning_rate": 0.000426828947368421, + "loss": 0.6574, + "step": 45980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042675, + "loss": 0.6729, + "step": 45990 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004266710526315789, + "loss": 0.6701, + "step": 46000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004265921052631579, + "loss": 0.6795, + "step": 46010 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004265131578947368, + "loss": 0.6854, + "step": 46020 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004264342105263158, + "loss": 0.6835, + "step": 46030 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004263552631578947, + "loss": 0.6778, + "step": 46040 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004262763157894737, + "loss": 0.6796, + "step": 46050 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004261973684210526, + "loss": 0.6714, + "step": 46060 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004261184210526315, + "loss": 0.6824, + "step": 46070 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004260394736842105, + "loss": 0.6705, + "step": 46080 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004259605263157894, + "loss": 0.6843, + "step": 46090 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004258815789473684, + "loss": 0.6873, + "step": 46100 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004258026315789473, + "loss": 0.6683, + "step": 46110 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004257236842105263, + "loss": 0.662, + "step": 46120 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004256447368421052, + "loss": 0.6848, + "step": 46130 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042556578947368416, + "loss": 0.6905, + "step": 46140 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004254868421052631, + "loss": 0.6866, + "step": 46150 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042540789473684206, + "loss": 0.6808, + "step": 46160 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042532894736842106, + "loss": 0.6693, + "step": 46170 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042524999999999996, + "loss": 0.6554, + "step": 46180 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042517105263157896, + "loss": 0.6523, + "step": 46190 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042509210526315785, + "loss": 0.6718, + "step": 46200 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004250131578947368, + "loss": 0.6625, + "step": 46210 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042493421052631575, + "loss": 0.6728, + "step": 46220 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004248552631578947, + "loss": 0.6786, + "step": 46230 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042477631578947365, + "loss": 0.6808, + "step": 46240 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004246973684210526, + "loss": 0.6762, + "step": 46250 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004246184210526316, + "loss": 0.6669, + "step": 46260 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004245394736842105, + "loss": 0.6685, + "step": 46270 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042446052631578944, + "loss": 0.6691, + "step": 46280 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004243815789473684, + "loss": 0.6511, + "step": 46290 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042430263157894733, + "loss": 0.6455, + "step": 46300 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004242236842105263, + "loss": 0.6656, + "step": 46310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042414473684210523, + "loss": 0.6635, + "step": 46320 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004240657894736841, + "loss": 0.6599, + "step": 46330 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042398684210526313, + "loss": 0.6679, + "step": 46340 + }, + { + "epoch": 0.46, + "learning_rate": 0.000423907894736842, + "loss": 0.6538, + "step": 46350 + }, + { + "epoch": 0.46, + "learning_rate": 0.000423828947368421, + "loss": 0.6692, + "step": 46360 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042375, + "loss": 0.6859, + "step": 46370 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004236710526315789, + "loss": 0.679, + "step": 46380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042359210526315787, + "loss": 0.6792, + "step": 46390 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042351315789473676, + "loss": 0.6799, + "step": 46400 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042343421052631577, + "loss": 0.6776, + "step": 46410 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042335526315789466, + "loss": 0.6638, + "step": 46420 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042327631578947366, + "loss": 0.6603, + "step": 46430 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042319736842105256, + "loss": 0.6629, + "step": 46440 + }, + { + "epoch": 0.46, + "learning_rate": 0.00042311842105263156, + "loss": 0.6683, + "step": 46450 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004230394736842105, + "loss": 0.6739, + "step": 46460 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004229605263157894, + "loss": 0.6617, + "step": 46470 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004228815789473684, + "loss": 0.6792, + "step": 46480 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004228026315789473, + "loss": 0.6733, + "step": 46490 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004227236842105263, + "loss": 0.6617, + "step": 46500 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004226447368421052, + "loss": 0.6792, + "step": 46510 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004225657894736842, + "loss": 0.6797, + "step": 46520 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004224868421052631, + "loss": 0.6739, + "step": 46530 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004224078947368421, + "loss": 0.6763, + "step": 46540 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042232894736842104, + "loss": 0.6887, + "step": 46550 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042224999999999994, + "loss": 0.6853, + "step": 46560 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042217105263157894, + "loss": 0.665, + "step": 46570 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042209210526315783, + "loss": 0.6908, + "step": 46580 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042201315789473684, + "loss": 0.6696, + "step": 46590 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042193421052631573, + "loss": 0.6524, + "step": 46600 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042185526315789473, + "loss": 0.6785, + "step": 46610 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004217763157894736, + "loss": 0.6727, + "step": 46620 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004216973684210526, + "loss": 0.6731, + "step": 46630 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004216184210526316, + "loss": 0.6638, + "step": 46640 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042153947368421047, + "loss": 0.6859, + "step": 46650 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004214605263157895, + "loss": 0.6735, + "step": 46660 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042138157894736837, + "loss": 0.6754, + "step": 46670 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042130263157894737, + "loss": 0.6737, + "step": 46680 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042122368421052626, + "loss": 0.6727, + "step": 46690 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004211447368421052, + "loss": 0.6797, + "step": 46700 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042106578947368416, + "loss": 0.6881, + "step": 46710 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004209868421052631, + "loss": 0.6768, + "step": 46720 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004209078947368421, + "loss": 0.6828, + "step": 46730 + }, + { + "epoch": 0.47, + "learning_rate": 0.000420828947368421, + "loss": 0.6682, + "step": 46740 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042075, + "loss": 0.6918, + "step": 46750 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004206710526315789, + "loss": 0.6486, + "step": 46760 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042059210526315785, + "loss": 0.6764, + "step": 46770 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004205131578947368, + "loss": 0.6808, + "step": 46780 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042043421052631575, + "loss": 0.6777, + "step": 46790 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004203552631578947, + "loss": 0.6736, + "step": 46800 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042027631578947364, + "loss": 0.6823, + "step": 46810 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042019736842105265, + "loss": 0.6693, + "step": 46820 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042012631578947367, + "loss": 0.6722, + "step": 46830 + }, + { + "epoch": 0.47, + "learning_rate": 0.00042004736842105256, + "loss": 0.675, + "step": 46840 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041996842105263156, + "loss": 0.6845, + "step": 46850 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041988947368421046, + "loss": 0.6852, + "step": 46860 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041981052631578946, + "loss": 0.6726, + "step": 46870 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041973157894736835, + "loss": 0.6752, + "step": 46880 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004196526315789473, + "loss": 0.6716, + "step": 46890 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004195736842105263, + "loss": 0.6745, + "step": 46900 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004194947368421052, + "loss": 0.6786, + "step": 46910 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004194157894736842, + "loss": 0.671, + "step": 46920 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004193368421052631, + "loss": 0.6709, + "step": 46930 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004192578947368421, + "loss": 0.6724, + "step": 46940 + }, + { + "epoch": 0.47, + "learning_rate": 0.000419178947368421, + "loss": 0.6708, + "step": 46950 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004191, + "loss": 0.6692, + "step": 46960 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004190210526315789, + "loss": 0.6705, + "step": 46970 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041894210526315784, + "loss": 0.6702, + "step": 46980 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041886315789473684, + "loss": 0.6726, + "step": 46990 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041878421052631573, + "loss": 0.6681, + "step": 47000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041870526315789474, + "loss": 0.679, + "step": 47010 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041862631578947363, + "loss": 0.6913, + "step": 47020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041854736842105263, + "loss": 0.6748, + "step": 47030 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041846842105263153, + "loss": 0.6694, + "step": 47040 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004183894736842105, + "loss": 0.6742, + "step": 47050 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004183105263157894, + "loss": 0.6874, + "step": 47060 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041823157894736837, + "loss": 0.6867, + "step": 47070 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004181526315789474, + "loss": 0.6798, + "step": 47080 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041807368421052627, + "loss": 0.6688, + "step": 47090 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041799473684210527, + "loss": 0.6667, + "step": 47100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041791578947368417, + "loss": 0.6644, + "step": 47110 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004178368421052631, + "loss": 0.6694, + "step": 47120 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041775789473684206, + "loss": 0.675, + "step": 47130 + }, + { + "epoch": 0.47, + "learning_rate": 0.000417678947368421, + "loss": 0.6737, + "step": 47140 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041759999999999996, + "loss": 0.6563, + "step": 47150 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004175210526315789, + "loss": 0.6591, + "step": 47160 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004174421052631579, + "loss": 0.6632, + "step": 47170 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004173631578947368, + "loss": 0.663, + "step": 47180 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041728421052631575, + "loss": 0.6693, + "step": 47190 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004172052631578947, + "loss": 0.6683, + "step": 47200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041712631578947365, + "loss": 0.6762, + "step": 47210 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004170473684210526, + "loss": 0.6671, + "step": 47220 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041696842105263155, + "loss": 0.6758, + "step": 47230 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041688947368421044, + "loss": 0.6842, + "step": 47240 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041681052631578944, + "loss": 0.6697, + "step": 47250 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004167315789473684, + "loss": 0.6771, + "step": 47260 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041665263157894734, + "loss": 0.6809, + "step": 47270 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004165736842105263, + "loss": 0.681, + "step": 47280 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041649473684210524, + "loss": 0.6644, + "step": 47290 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004164157894736842, + "loss": 0.6846, + "step": 47300 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004163368421052631, + "loss": 0.6707, + "step": 47310 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004162578947368421, + "loss": 0.6552, + "step": 47320 + }, + { + "epoch": 0.47, + "learning_rate": 0.000416178947368421, + "loss": 0.6606, + "step": 47330 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004161, + "loss": 0.6753, + "step": 47340 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004160210526315789, + "loss": 0.6682, + "step": 47350 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004159421052631579, + "loss": 0.6725, + "step": 47360 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004158631578947368, + "loss": 0.666, + "step": 47370 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004157842105263157, + "loss": 0.6741, + "step": 47380 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004157052631578947, + "loss": 0.6695, + "step": 47390 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004156263157894736, + "loss": 0.6607, + "step": 47400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004155473684210526, + "loss": 0.6789, + "step": 47410 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004154684210526315, + "loss": 0.6655, + "step": 47420 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004153894736842105, + "loss": 0.6711, + "step": 47430 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004153105263157894, + "loss": 0.6638, + "step": 47440 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004152315789473684, + "loss": 0.6634, + "step": 47450 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041515263157894736, + "loss": 0.6739, + "step": 47460 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041507368421052625, + "loss": 0.6712, + "step": 47470 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041499473684210525, + "loss": 0.6556, + "step": 47480 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041491578947368415, + "loss": 0.6629, + "step": 47490 + }, + { + "epoch": 0.47, + "learning_rate": 0.00041483684210526315, + "loss": 0.6603, + "step": 47500 + }, + { + "epoch": 0.47, + "eval_accuracy": 0.8611843722171862, + "eval_loss": 0.65625, + "eval_runtime": 97.2871, + "eval_samples_per_second": 822.308, + "eval_steps_per_second": 1.614, + "step": 47500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041475789473684204, + "loss": 0.6665, + "step": 47510 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041467894736842105, + "loss": 0.6721, + "step": 47520 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041459999999999994, + "loss": 0.6617, + "step": 47530 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004145210526315789, + "loss": 0.662, + "step": 47540 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004144421052631579, + "loss": 0.6685, + "step": 47550 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004143631578947368, + "loss": 0.665, + "step": 47560 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004142842105263158, + "loss": 0.6763, + "step": 47570 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004142052631578947, + "loss": 0.6729, + "step": 47580 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004141263157894737, + "loss": 0.6688, + "step": 47590 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004140473684210526, + "loss": 0.6775, + "step": 47600 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004139684210526315, + "loss": 0.6608, + "step": 47610 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004138894736842105, + "loss": 0.6663, + "step": 47620 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004138105263157894, + "loss": 0.681, + "step": 47630 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004137315789473684, + "loss": 0.6751, + "step": 47640 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004136526315789473, + "loss": 0.6745, + "step": 47650 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004135736842105263, + "loss": 0.6808, + "step": 47660 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004134947368421052, + "loss": 0.6837, + "step": 47670 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041341578947368416, + "loss": 0.6747, + "step": 47680 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004133368421052631, + "loss": 0.6863, + "step": 47690 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041325789473684206, + "loss": 0.6741, + "step": 47700 + }, + { + "epoch": 0.48, + "learning_rate": 0.000413178947368421, + "loss": 0.6686, + "step": 47710 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041309999999999996, + "loss": 0.6508, + "step": 47720 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041302105263157896, + "loss": 0.6655, + "step": 47730 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041294210526315785, + "loss": 0.6534, + "step": 47740 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004128631578947368, + "loss": 0.6614, + "step": 47750 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041278421052631575, + "loss": 0.6596, + "step": 47760 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004127052631578947, + "loss": 0.6649, + "step": 47770 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041262631578947365, + "loss": 0.6662, + "step": 47780 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004125473684210526, + "loss": 0.6594, + "step": 47790 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004124684210526315, + "loss": 0.6673, + "step": 47800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004123894736842105, + "loss": 0.6615, + "step": 47810 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004123105263157895, + "loss": 0.6646, + "step": 47820 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004122315789473684, + "loss": 0.6659, + "step": 47830 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041215263157894734, + "loss": 0.6614, + "step": 47840 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004120736842105263, + "loss": 0.6622, + "step": 47850 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041199473684210523, + "loss": 0.6761, + "step": 47860 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041191578947368413, + "loss": 0.6659, + "step": 47870 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041183684210526313, + "loss": 0.6695, + "step": 47880 + }, + { + "epoch": 0.48, + "learning_rate": 0.000411757894736842, + "loss": 0.6684, + "step": 47890 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041167894736842103, + "loss": 0.6677, + "step": 47900 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004116, + "loss": 0.6579, + "step": 47910 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004115210526315789, + "loss": 0.6563, + "step": 47920 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041144210526315787, + "loss": 0.6725, + "step": 47930 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004113631578947368, + "loss": 0.6655, + "step": 47940 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041128421052631577, + "loss": 0.658, + "step": 47950 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041120526315789466, + "loss": 0.683, + "step": 47960 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041112631578947367, + "loss": 0.6687, + "step": 47970 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041104736842105256, + "loss": 0.6673, + "step": 47980 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041096842105263156, + "loss": 0.6745, + "step": 47990 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041088947368421046, + "loss": 0.6717, + "step": 48000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041081052631578946, + "loss": 0.6718, + "step": 48010 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004107315789473684, + "loss": 0.6721, + "step": 48020 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004106526315789473, + "loss": 0.6806, + "step": 48030 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004105736842105263, + "loss": 0.677, + "step": 48040 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004104947368421052, + "loss": 0.6742, + "step": 48050 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004104157894736842, + "loss": 0.6732, + "step": 48060 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004103368421052631, + "loss": 0.6678, + "step": 48070 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004102578947368421, + "loss": 0.6677, + "step": 48080 + }, + { + "epoch": 0.48, + "learning_rate": 0.000410178947368421, + "loss": 0.6641, + "step": 48090 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041009999999999994, + "loss": 0.68, + "step": 48100 + }, + { + "epoch": 0.48, + "learning_rate": 0.00041002105263157894, + "loss": 0.656, + "step": 48110 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040994210526315784, + "loss": 0.6642, + "step": 48120 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040986315789473684, + "loss": 0.6736, + "step": 48130 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040978421052631573, + "loss": 0.6775, + "step": 48140 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040970526315789474, + "loss": 0.6898, + "step": 48150 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040962631578947363, + "loss": 0.6685, + "step": 48160 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004095473684210526, + "loss": 0.6727, + "step": 48170 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004094684210526315, + "loss": 0.6671, + "step": 48180 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004093894736842105, + "loss": 0.6635, + "step": 48190 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004093105263157895, + "loss": 0.6664, + "step": 48200 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040923157894736837, + "loss": 0.6647, + "step": 48210 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004091526315789474, + "loss": 0.6597, + "step": 48220 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040907368421052627, + "loss": 0.6663, + "step": 48230 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004089947368421052, + "loss": 0.6622, + "step": 48240 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040891578947368416, + "loss": 0.6684, + "step": 48250 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004088368421052631, + "loss": 0.6645, + "step": 48260 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040875789473684206, + "loss": 0.6604, + "step": 48270 + }, + { + "epoch": 0.48, + "learning_rate": 0.000408678947368421, + "loss": 0.6632, + "step": 48280 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004086, + "loss": 0.6635, + "step": 48290 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004085210526315789, + "loss": 0.6639, + "step": 48300 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004084421052631579, + "loss": 0.6704, + "step": 48310 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004083631578947368, + "loss": 0.6704, + "step": 48320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040828421052631575, + "loss": 0.6678, + "step": 48330 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004082052631578947, + "loss": 0.6555, + "step": 48340 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040812631578947365, + "loss": 0.6691, + "step": 48350 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040804736842105254, + "loss": 0.6684, + "step": 48360 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040796842105263154, + "loss": 0.6829, + "step": 48370 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040788947368421055, + "loss": 0.6842, + "step": 48380 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040781052631578944, + "loss": 0.6686, + "step": 48390 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004077315789473684, + "loss": 0.6714, + "step": 48400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040765263157894734, + "loss": 0.6656, + "step": 48410 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004075736842105263, + "loss": 0.6594, + "step": 48420 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040749473684210523, + "loss": 0.6602, + "step": 48430 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004074157894736842, + "loss": 0.6531, + "step": 48440 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004073368421052631, + "loss": 0.6682, + "step": 48450 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004072578947368421, + "loss": 0.6538, + "step": 48460 + }, + { + "epoch": 0.48, + "learning_rate": 0.000407178947368421, + "loss": 0.6633, + "step": 48470 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004071, + "loss": 0.6666, + "step": 48480 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004070210526315789, + "loss": 0.677, + "step": 48490 + }, + { + "epoch": 0.48, + "learning_rate": 0.00040694210526315787, + "loss": 0.6798, + "step": 48500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004068631578947368, + "loss": 0.6707, + "step": 48510 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004067842105263157, + "loss": 0.6508, + "step": 48520 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004067052631578947, + "loss": 0.6545, + "step": 48530 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004066263157894736, + "loss": 0.6535, + "step": 48540 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004065473684210526, + "loss": 0.655, + "step": 48550 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040646842105263156, + "loss": 0.6534, + "step": 48560 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004063894736842105, + "loss": 0.6458, + "step": 48570 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040631052631578946, + "loss": 0.6362, + "step": 48580 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040623157894736835, + "loss": 0.6534, + "step": 48590 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040615263157894735, + "loss": 0.6359, + "step": 48600 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040607368421052625, + "loss": 0.6409, + "step": 48610 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040599473684210525, + "loss": 0.6255, + "step": 48620 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040591578947368415, + "loss": 0.6389, + "step": 48630 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040583684210526315, + "loss": 0.6409, + "step": 48640 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040575789473684204, + "loss": 0.6467, + "step": 48650 + }, + { + "epoch": 0.49, + "learning_rate": 0.000405678947368421, + "loss": 0.6451, + "step": 48660 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004056, + "loss": 0.6524, + "step": 48670 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004055210526315789, + "loss": 0.6358, + "step": 48680 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004054421052631579, + "loss": 0.6621, + "step": 48690 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004053631578947368, + "loss": 0.6634, + "step": 48700 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004052842105263158, + "loss": 0.6623, + "step": 48710 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004052052631578947, + "loss": 0.674, + "step": 48720 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004051342105263158, + "loss": 0.6669, + "step": 48730 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004050552631578947, + "loss": 0.6658, + "step": 48740 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040497631578947365, + "loss": 0.666, + "step": 48750 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004048973684210526, + "loss": 0.67, + "step": 48760 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040481842105263155, + "loss": 0.6487, + "step": 48770 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040473947368421044, + "loss": 0.6497, + "step": 48780 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040466052631578944, + "loss": 0.6421, + "step": 48790 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040458157894736834, + "loss": 0.6627, + "step": 48800 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040450263157894734, + "loss": 0.6718, + "step": 48810 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004044236842105263, + "loss": 0.6636, + "step": 48820 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040434473684210524, + "loss": 0.6705, + "step": 48830 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004042657894736842, + "loss": 0.6773, + "step": 48840 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040418684210526313, + "loss": 0.6702, + "step": 48850 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004041078947368421, + "loss": 0.658, + "step": 48860 + }, + { + "epoch": 0.49, + "learning_rate": 0.000404028947368421, + "loss": 0.6717, + "step": 48870 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040395, + "loss": 0.6848, + "step": 48880 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004038710526315789, + "loss": 0.6769, + "step": 48890 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004037921052631579, + "loss": 0.6567, + "step": 48900 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004037131578947368, + "loss": 0.6604, + "step": 48910 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040363421052631577, + "loss": 0.6588, + "step": 48920 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004035552631578947, + "loss": 0.6665, + "step": 48930 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004034763157894736, + "loss": 0.6531, + "step": 48940 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004033973684210526, + "loss": 0.6615, + "step": 48950 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004033184210526315, + "loss": 0.6544, + "step": 48960 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004032394736842105, + "loss": 0.6442, + "step": 48970 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004031605263157894, + "loss": 0.6489, + "step": 48980 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004030815789473684, + "loss": 0.6303, + "step": 48990 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040300263157894736, + "loss": 0.6541, + "step": 49000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040292368421052625, + "loss": 0.6591, + "step": 49010 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040284473684210526, + "loss": 0.6793, + "step": 49020 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040276578947368415, + "loss": 0.6784, + "step": 49030 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040268684210526315, + "loss": 0.658, + "step": 49040 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040260789473684205, + "loss": 0.6489, + "step": 49050 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040252894736842105, + "loss": 0.6641, + "step": 49060 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040244999999999994, + "loss": 0.6475, + "step": 49070 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004023710526315789, + "loss": 0.6513, + "step": 49080 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040229210526315784, + "loss": 0.6385, + "step": 49090 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004022131578947368, + "loss": 0.6425, + "step": 49100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004021342105263158, + "loss": 0.6633, + "step": 49110 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004020552631578947, + "loss": 0.6402, + "step": 49120 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004019763157894737, + "loss": 0.644, + "step": 49130 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004018973684210526, + "loss": 0.665, + "step": 49140 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040181842105263153, + "loss": 0.6602, + "step": 49150 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004017394736842105, + "loss": 0.6461, + "step": 49160 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004016605263157894, + "loss": 0.6612, + "step": 49170 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004015815789473684, + "loss": 0.6735, + "step": 49180 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004015026315789473, + "loss": 0.6647, + "step": 49190 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004014236842105263, + "loss": 0.6665, + "step": 49200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004013447368421052, + "loss": 0.6698, + "step": 49210 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004012657894736842, + "loss": 0.6686, + "step": 49220 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004011868421052631, + "loss": 0.6732, + "step": 49230 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040110789473684206, + "loss": 0.6715, + "step": 49240 + }, + { + "epoch": 0.49, + "learning_rate": 0.000401028947368421, + "loss": 0.6625, + "step": 49250 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040094999999999996, + "loss": 0.6672, + "step": 49260 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040087105263157886, + "loss": 0.6581, + "step": 49270 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040079210526315786, + "loss": 0.6664, + "step": 49280 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040071315789473686, + "loss": 0.6461, + "step": 49290 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040063421052631575, + "loss": 0.6499, + "step": 49300 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004005552631578947, + "loss": 0.6513, + "step": 49310 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040047631578947365, + "loss": 0.65, + "step": 49320 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004003973684210526, + "loss": 0.6534, + "step": 49330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040031842105263155, + "loss": 0.6588, + "step": 49340 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004002394736842105, + "loss": 0.6739, + "step": 49350 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004001605263157894, + "loss": 0.667, + "step": 49360 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004000815789473684, + "loss": 0.6504, + "step": 49370 + }, + { + "epoch": 0.49, + "learning_rate": 0.00040000263157894734, + "loss": 0.6691, + "step": 49380 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003999236842105263, + "loss": 0.66, + "step": 49390 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039984473684210524, + "loss": 0.6655, + "step": 49400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003997657894736842, + "loss": 0.6522, + "step": 49410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039968684210526313, + "loss": 0.6568, + "step": 49420 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039960789473684203, + "loss": 0.6421, + "step": 49430 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039952894736842103, + "loss": 0.6648, + "step": 49440 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003994499999999999, + "loss": 0.6596, + "step": 49450 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003993710526315789, + "loss": 0.6566, + "step": 49460 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003992921052631579, + "loss": 0.6488, + "step": 49470 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003992131578947368, + "loss": 0.6351, + "step": 49480 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039913421052631577, + "loss": 0.6388, + "step": 49490 + }, + { + "epoch": 0.49, + "learning_rate": 0.00039905526315789467, + "loss": 0.6648, + "step": 49500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039897631578947367, + "loss": 0.6592, + "step": 49510 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039889736842105256, + "loss": 0.6631, + "step": 49520 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039881842105263157, + "loss": 0.663, + "step": 49530 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039873947368421046, + "loss": 0.6561, + "step": 49540 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039866052631578946, + "loss": 0.6616, + "step": 49550 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003985815789473684, + "loss": 0.6621, + "step": 49560 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003985026315789473, + "loss": 0.6695, + "step": 49570 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003984236842105263, + "loss": 0.6611, + "step": 49580 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003983447368421052, + "loss": 0.6498, + "step": 49590 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003982657894736842, + "loss": 0.6701, + "step": 49600 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003981868421052631, + "loss": 0.6551, + "step": 49610 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003981078947368421, + "loss": 0.6552, + "step": 49620 + }, + { + "epoch": 0.5, + "learning_rate": 0.000398028947368421, + "loss": 0.6551, + "step": 49630 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039794999999999994, + "loss": 0.6551, + "step": 49640 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003978710526315789, + "loss": 0.6727, + "step": 49650 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039779210526315784, + "loss": 0.6609, + "step": 49660 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039771315789473684, + "loss": 0.6655, + "step": 49670 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039763421052631574, + "loss": 0.659, + "step": 49680 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039755526315789474, + "loss": 0.6646, + "step": 49690 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039747631578947363, + "loss": 0.6639, + "step": 49700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039739736842105263, + "loss": 0.6631, + "step": 49710 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039731842105263153, + "loss": 0.6661, + "step": 49720 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003972394736842105, + "loss": 0.6596, + "step": 49730 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003971605263157894, + "loss": 0.6568, + "step": 49740 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003970815789473684, + "loss": 0.6572, + "step": 49750 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003970026315789474, + "loss": 0.6661, + "step": 49760 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039692368421052627, + "loss": 0.6573, + "step": 49770 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039684473684210527, + "loss": 0.6563, + "step": 49780 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039676578947368417, + "loss": 0.6427, + "step": 49790 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003966868421052631, + "loss": 0.6553, + "step": 49800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039660789473684206, + "loss": 0.6391, + "step": 49810 + }, + { + "epoch": 0.5, + "learning_rate": 0.000396528947368421, + "loss": 0.6505, + "step": 49820 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039644999999999996, + "loss": 0.6568, + "step": 49830 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003963710526315789, + "loss": 0.6554, + "step": 49840 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003962921052631579, + "loss": 0.6532, + "step": 49850 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003962131578947368, + "loss": 0.6553, + "step": 49860 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039613421052631575, + "loss": 0.6671, + "step": 49870 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003960552631578947, + "loss": 0.6608, + "step": 49880 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039597631578947365, + "loss": 0.6586, + "step": 49890 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003958973684210526, + "loss": 0.6551, + "step": 49900 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039581842105263155, + "loss": 0.6553, + "step": 49910 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039573947368421044, + "loss": 0.6585, + "step": 49920 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039566052631578944, + "loss": 0.6604, + "step": 49930 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003955815789473684, + "loss": 0.6602, + "step": 49940 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039550263157894734, + "loss": 0.6503, + "step": 49950 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003954236842105263, + "loss": 0.6615, + "step": 49960 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039534473684210524, + "loss": 0.6618, + "step": 49970 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003952657894736842, + "loss": 0.6545, + "step": 49980 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003951868421052631, + "loss": 0.6545, + "step": 49990 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003951078947368421, + "loss": 0.6467, + "step": 50000 + }, + { + "epoch": 0.5, + "eval_accuracy": 0.8627790655085218, + "eval_loss": 0.6474609375, + "eval_runtime": 97.2319, + "eval_samples_per_second": 822.775, + "eval_steps_per_second": 1.615, + "step": 50000 + }, + { + "epoch": 0.5, + "learning_rate": 0.000395028947368421, + "loss": 0.6478, + "step": 50010 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039495, + "loss": 0.6406, + "step": 50020 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003948710526315789, + "loss": 0.6548, + "step": 50030 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003947921052631579, + "loss": 0.6629, + "step": 50040 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003947131578947368, + "loss": 0.6648, + "step": 50050 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003946342105263157, + "loss": 0.6518, + "step": 50060 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003945552631578947, + "loss": 0.6682, + "step": 50070 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003944763157894736, + "loss": 0.6668, + "step": 50080 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003943973684210526, + "loss": 0.6572, + "step": 50090 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003943184210526315, + "loss": 0.6682, + "step": 50100 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003942394736842105, + "loss": 0.6581, + "step": 50110 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039416052631578946, + "loss": 0.644, + "step": 50120 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039408157894736835, + "loss": 0.6578, + "step": 50130 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039400263157894736, + "loss": 0.6699, + "step": 50140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039392368421052625, + "loss": 0.6599, + "step": 50150 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039384473684210525, + "loss": 0.6607, + "step": 50160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039376578947368415, + "loss": 0.6576, + "step": 50170 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039368684210526315, + "loss": 0.6477, + "step": 50180 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039360789473684204, + "loss": 0.6685, + "step": 50190 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039352894736842105, + "loss": 0.6593, + "step": 50200 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039345, + "loss": 0.6451, + "step": 50210 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003933710526315789, + "loss": 0.6577, + "step": 50220 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003932921052631579, + "loss": 0.6486, + "step": 50230 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003932131578947368, + "loss": 0.6487, + "step": 50240 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003931342105263158, + "loss": 0.6562, + "step": 50250 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003930552631578947, + "loss": 0.6566, + "step": 50260 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003929763157894737, + "loss": 0.6451, + "step": 50270 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003928973684210526, + "loss": 0.6526, + "step": 50280 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039281842105263153, + "loss": 0.65, + "step": 50290 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003927394736842105, + "loss": 0.6615, + "step": 50300 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003926605263157894, + "loss": 0.6484, + "step": 50310 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003925815789473684, + "loss": 0.6562, + "step": 50320 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003925026315789473, + "loss": 0.6456, + "step": 50330 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003924236842105263, + "loss": 0.6401, + "step": 50340 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003923447368421052, + "loss": 0.6466, + "step": 50350 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039226578947368417, + "loss": 0.656, + "step": 50360 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003921868421052631, + "loss": 0.6617, + "step": 50370 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039210789473684206, + "loss": 0.6593, + "step": 50380 + }, + { + "epoch": 0.5, + "learning_rate": 0.000392028947368421, + "loss": 0.6499, + "step": 50390 + }, + { + "epoch": 0.5, + "learning_rate": 0.00039194999999999996, + "loss": 0.6606, + "step": 50400 + }, + { + "epoch": 0.5, + "learning_rate": 0.000391878947368421, + "loss": 0.6769, + "step": 50410 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003918, + "loss": 0.6655, + "step": 50420 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003917210526315789, + "loss": 0.6597, + "step": 50430 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003916421052631579, + "loss": 0.6629, + "step": 50440 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003915631578947368, + "loss": 0.6632, + "step": 50450 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003914842105263158, + "loss": 0.6738, + "step": 50460 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003914052631578947, + "loss": 0.6579, + "step": 50470 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003913263157894736, + "loss": 0.6583, + "step": 50480 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003912473684210526, + "loss": 0.6537, + "step": 50490 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003911684210526315, + "loss": 0.6484, + "step": 50500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003910894736842105, + "loss": 0.657, + "step": 50510 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003910105263157894, + "loss": 0.6687, + "step": 50520 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003909315789473684, + "loss": 0.6594, + "step": 50530 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003908526315789473, + "loss": 0.6434, + "step": 50540 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039077368421052626, + "loss": 0.6427, + "step": 50550 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039069473684210526, + "loss": 0.6594, + "step": 50560 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039061578947368415, + "loss": 0.6624, + "step": 50570 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039053684210526316, + "loss": 0.6478, + "step": 50580 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039045789473684205, + "loss": 0.6563, + "step": 50590 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039037894736842105, + "loss": 0.6623, + "step": 50600 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039029999999999995, + "loss": 0.645, + "step": 50610 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039022105263157895, + "loss": 0.6657, + "step": 50620 + }, + { + "epoch": 0.51, + "learning_rate": 0.00039014210526315784, + "loss": 0.6548, + "step": 50630 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003900631578947368, + "loss": 0.6546, + "step": 50640 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003899842105263158, + "loss": 0.65, + "step": 50650 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003899052631578947, + "loss": 0.6426, + "step": 50660 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003898263157894737, + "loss": 0.6462, + "step": 50670 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003897473684210526, + "loss": 0.647, + "step": 50680 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003896684210526316, + "loss": 0.6356, + "step": 50690 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003895894736842105, + "loss": 0.6479, + "step": 50700 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038951052631578943, + "loss": 0.6553, + "step": 50710 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003894315789473684, + "loss": 0.6536, + "step": 50720 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003893526315789473, + "loss": 0.6348, + "step": 50730 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003892736842105263, + "loss": 0.639, + "step": 50740 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003891947368421052, + "loss": 0.6415, + "step": 50750 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003891157894736842, + "loss": 0.6507, + "step": 50760 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003890368421052631, + "loss": 0.6582, + "step": 50770 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038895789473684207, + "loss": 0.6578, + "step": 50780 + }, + { + "epoch": 0.51, + "learning_rate": 0.000388878947368421, + "loss": 0.6626, + "step": 50790 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038879999999999996, + "loss": 0.6634, + "step": 50800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003887210526315789, + "loss": 0.6446, + "step": 50810 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038864210526315786, + "loss": 0.6516, + "step": 50820 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038856315789473675, + "loss": 0.6617, + "step": 50830 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038848421052631576, + "loss": 0.65, + "step": 50840 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003884052631578947, + "loss": 0.6599, + "step": 50850 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038832631578947365, + "loss": 0.6413, + "step": 50860 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003882473684210526, + "loss": 0.6392, + "step": 50870 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038816842105263155, + "loss": 0.6411, + "step": 50880 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003880894736842105, + "loss": 0.648, + "step": 50890 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003880105263157894, + "loss": 0.6573, + "step": 50900 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003879315789473684, + "loss": 0.6687, + "step": 50910 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003878526315789473, + "loss": 0.6561, + "step": 50920 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003877736842105263, + "loss": 0.6528, + "step": 50930 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038769473684210524, + "loss": 0.6566, + "step": 50940 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003876157894736842, + "loss": 0.6565, + "step": 50950 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038753684210526314, + "loss": 0.655, + "step": 50960 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038745789473684203, + "loss": 0.6358, + "step": 50970 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038737894736842103, + "loss": 0.6547, + "step": 50980 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038729999999999993, + "loss": 0.6545, + "step": 50990 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038722105263157893, + "loss": 0.641, + "step": 51000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003871421052631578, + "loss": 0.6476, + "step": 51010 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003870631578947368, + "loss": 0.6519, + "step": 51020 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003869842105263158, + "loss": 0.6684, + "step": 51030 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003869052631578947, + "loss": 0.64, + "step": 51040 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038682631578947367, + "loss": 0.6486, + "step": 51050 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038674736842105257, + "loss": 0.6557, + "step": 51060 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038666842105263157, + "loss": 0.6658, + "step": 51070 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038658947368421046, + "loss": 0.6544, + "step": 51080 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038651052631578946, + "loss": 0.6711, + "step": 51090 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038643157894736836, + "loss": 0.6641, + "step": 51100 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038635263157894736, + "loss": 0.6559, + "step": 51110 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003862736842105263, + "loss": 0.6534, + "step": 51120 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003861947368421052, + "loss": 0.6714, + "step": 51130 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003861157894736842, + "loss": 0.6672, + "step": 51140 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003860368421052631, + "loss": 0.6625, + "step": 51150 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003859578947368421, + "loss": 0.6477, + "step": 51160 + }, + { + "epoch": 0.51, + "learning_rate": 0.000385878947368421, + "loss": 0.6416, + "step": 51170 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003858, + "loss": 0.6352, + "step": 51180 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003857210526315789, + "loss": 0.6331, + "step": 51190 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038564210526315784, + "loss": 0.644, + "step": 51200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038556315789473684, + "loss": 0.6553, + "step": 51210 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038548421052631574, + "loss": 0.655, + "step": 51220 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038540526315789474, + "loss": 0.6516, + "step": 51230 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038532631578947363, + "loss": 0.658, + "step": 51240 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038524736842105264, + "loss": 0.6545, + "step": 51250 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038516842105263153, + "loss": 0.6655, + "step": 51260 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003850894736842105, + "loss": 0.66, + "step": 51270 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038501052631578943, + "loss": 0.6617, + "step": 51280 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003849315789473684, + "loss": 0.6322, + "step": 51290 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003848526315789474, + "loss": 0.6551, + "step": 51300 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003847736842105263, + "loss": 0.6413, + "step": 51310 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003846947368421053, + "loss": 0.6444, + "step": 51320 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038461578947368417, + "loss": 0.6408, + "step": 51330 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003845368421052631, + "loss": 0.643, + "step": 51340 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038445789473684207, + "loss": 0.6361, + "step": 51350 + }, + { + "epoch": 0.51, + "learning_rate": 0.000384378947368421, + "loss": 0.6386, + "step": 51360 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038429999999999996, + "loss": 0.6516, + "step": 51370 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003842210526315789, + "loss": 0.6461, + "step": 51380 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003841421052631578, + "loss": 0.6445, + "step": 51390 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003840631578947368, + "loss": 0.652, + "step": 51400 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038398421052631576, + "loss": 0.646, + "step": 51410 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003839052631578947, + "loss": 0.6523, + "step": 51420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038382631578947365, + "loss": 0.651, + "step": 51430 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003837473684210526, + "loss": 0.6525, + "step": 51440 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038366842105263155, + "loss": 0.6591, + "step": 51450 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038358947368421044, + "loss": 0.6548, + "step": 51460 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038351052631578945, + "loss": 0.6574, + "step": 51470 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038343157894736834, + "loss": 0.6645, + "step": 51480 + }, + { + "epoch": 0.51, + "learning_rate": 0.00038335263157894734, + "loss": 0.6542, + "step": 51490 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003832736842105263, + "loss": 0.6521, + "step": 51500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038319473684210524, + "loss": 0.6574, + "step": 51510 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003831157894736842, + "loss": 0.6604, + "step": 51520 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038303684210526314, + "loss": 0.6465, + "step": 51530 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003829578947368421, + "loss": 0.6578, + "step": 51540 + }, + { + "epoch": 0.52, + "learning_rate": 0.000382878947368421, + "loss": 0.6521, + "step": 51550 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003828, + "loss": 0.6559, + "step": 51560 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003827210526315789, + "loss": 0.6621, + "step": 51570 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003826421052631579, + "loss": 0.6728, + "step": 51580 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003825631578947368, + "loss": 0.663, + "step": 51590 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003824842105263158, + "loss": 0.6561, + "step": 51600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003824052631578947, + "loss": 0.653, + "step": 51610 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003823263157894736, + "loss": 0.6648, + "step": 51620 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003822473684210526, + "loss": 0.6565, + "step": 51630 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003821684210526315, + "loss": 0.672, + "step": 51640 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003820894736842105, + "loss": 0.6618, + "step": 51650 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003820105263157894, + "loss": 0.666, + "step": 51660 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003819315789473684, + "loss": 0.6613, + "step": 51670 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038185263157894736, + "loss": 0.6443, + "step": 51680 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038177368421052625, + "loss": 0.6462, + "step": 51690 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038169473684210526, + "loss": 0.6637, + "step": 51700 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038161578947368415, + "loss": 0.6504, + "step": 51710 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038153684210526315, + "loss": 0.6587, + "step": 51720 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038145789473684205, + "loss": 0.6591, + "step": 51730 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038137894736842105, + "loss": 0.6515, + "step": 51740 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038129999999999994, + "loss": 0.6455, + "step": 51750 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003812210526315789, + "loss": 0.6511, + "step": 51760 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003811421052631579, + "loss": 0.6576, + "step": 51770 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003810631578947368, + "loss": 0.6549, + "step": 51780 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003809842105263158, + "loss": 0.6584, + "step": 51790 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003809052631578947, + "loss": 0.6557, + "step": 51800 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003808263157894737, + "loss": 0.6624, + "step": 51810 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003807473684210526, + "loss": 0.6569, + "step": 51820 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038066842105263153, + "loss": 0.6588, + "step": 51830 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003805894736842105, + "loss": 0.6647, + "step": 51840 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038051052631578943, + "loss": 0.6738, + "step": 51850 + }, + { + "epoch": 0.52, + "learning_rate": 0.00038043157894736843, + "loss": 0.6485, + "step": 51860 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003803526315789473, + "loss": 0.6389, + "step": 51870 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003802736842105263, + "loss": 0.6487, + "step": 51880 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003801947368421052, + "loss": 0.6574, + "step": 51890 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003801157894736842, + "loss": 0.6427, + "step": 51900 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003800368421052631, + "loss": 0.6439, + "step": 51910 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037995789473684207, + "loss": 0.6496, + "step": 51920 + }, + { + "epoch": 0.52, + "learning_rate": 0.000379878947368421, + "loss": 0.6539, + "step": 51930 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037979999999999996, + "loss": 0.6414, + "step": 51940 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037972105263157886, + "loss": 0.6469, + "step": 51950 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037964210526315786, + "loss": 0.6624, + "step": 51960 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037956315789473686, + "loss": 0.6523, + "step": 51970 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037948421052631576, + "loss": 0.6602, + "step": 51980 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003794052631578947, + "loss": 0.6606, + "step": 51990 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037932631578947365, + "loss": 0.6581, + "step": 52000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003792473684210526, + "loss": 0.661, + "step": 52010 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037916842105263155, + "loss": 0.6523, + "step": 52020 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003790894736842105, + "loss": 0.6457, + "step": 52030 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003790105263157894, + "loss": 0.6389, + "step": 52040 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003789315789473684, + "loss": 0.6602, + "step": 52050 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037885263157894734, + "loss": 0.6364, + "step": 52060 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003787736842105263, + "loss": 0.6482, + "step": 52070 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037869473684210524, + "loss": 0.6579, + "step": 52080 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003786157894736842, + "loss": 0.6532, + "step": 52090 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037853684210526313, + "loss": 0.6534, + "step": 52100 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037845789473684203, + "loss": 0.6596, + "step": 52110 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037837894736842103, + "loss": 0.6409, + "step": 52120 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037830789473684205, + "loss": 0.6608, + "step": 52130 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037822894736842105, + "loss": 0.6557, + "step": 52140 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037814999999999995, + "loss": 0.6693, + "step": 52150 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037807105263157895, + "loss": 0.6585, + "step": 52160 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037799210526315785, + "loss": 0.6476, + "step": 52170 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003779131578947368, + "loss": 0.6557, + "step": 52180 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037783421052631574, + "loss": 0.66, + "step": 52190 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003777552631578947, + "loss": 0.653, + "step": 52200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003776763157894737, + "loss": 0.6686, + "step": 52210 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003775973684210526, + "loss": 0.6333, + "step": 52220 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003775184210526316, + "loss": 0.6293, + "step": 52230 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003774394736842105, + "loss": 0.6291, + "step": 52240 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037736052631578943, + "loss": 0.6429, + "step": 52250 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003772815789473684, + "loss": 0.6422, + "step": 52260 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037720263157894733, + "loss": 0.634, + "step": 52270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003771236842105263, + "loss": 0.6351, + "step": 52280 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003770447368421052, + "loss": 0.6347, + "step": 52290 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037696578947368423, + "loss": 0.631, + "step": 52300 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003768868421052631, + "loss": 0.6298, + "step": 52310 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003768078947368421, + "loss": 0.642, + "step": 52320 + }, + { + "epoch": 0.52, + "learning_rate": 0.000376728947368421, + "loss": 0.6384, + "step": 52330 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037664999999999997, + "loss": 0.6417, + "step": 52340 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003765710526315789, + "loss": 0.6363, + "step": 52350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037649210526315786, + "loss": 0.6414, + "step": 52360 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037641315789473676, + "loss": 0.6366, + "step": 52370 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037633421052631576, + "loss": 0.6137, + "step": 52380 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037625526315789465, + "loss": 0.6338, + "step": 52390 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037617631578947366, + "loss": 0.6376, + "step": 52400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003760973684210526, + "loss": 0.6323, + "step": 52410 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037601842105263155, + "loss": 0.6304, + "step": 52420 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003759394736842105, + "loss": 0.6433, + "step": 52430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00037586052631578945, + "loss": 0.6372, + "step": 52440 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003757815789473684, + "loss": 0.6331, + "step": 52450 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003757026315789473, + "loss": 0.6432, + "step": 52460 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003756236842105263, + "loss": 0.655, + "step": 52470 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003755447368421052, + "loss": 0.6527, + "step": 52480 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003754657894736842, + "loss": 0.6574, + "step": 52490 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037538684210526314, + "loss": 0.665, + "step": 52500 + }, + { + "epoch": 0.53, + "eval_accuracy": 0.8649771990754928, + "eval_loss": 0.6337890625, + "eval_runtime": 97.2311, + "eval_samples_per_second": 822.782, + "eval_steps_per_second": 1.615, + "step": 52500 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003753078947368421, + "loss": 0.6597, + "step": 52510 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037522894736842104, + "loss": 0.6546, + "step": 52520 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037514999999999993, + "loss": 0.6526, + "step": 52530 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037507105263157893, + "loss": 0.6492, + "step": 52540 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003749921052631578, + "loss": 0.6548, + "step": 52550 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037491315789473683, + "loss": 0.6473, + "step": 52560 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003748342105263157, + "loss": 0.6521, + "step": 52570 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003747552631578947, + "loss": 0.6439, + "step": 52580 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003746763157894737, + "loss": 0.6484, + "step": 52590 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037459736842105257, + "loss": 0.653, + "step": 52600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037451842105263157, + "loss": 0.6508, + "step": 52610 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037443947368421046, + "loss": 0.6658, + "step": 52620 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037436052631578947, + "loss": 0.6619, + "step": 52630 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037428157894736836, + "loss": 0.6419, + "step": 52640 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037420263157894736, + "loss": 0.6504, + "step": 52650 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037412368421052626, + "loss": 0.66, + "step": 52660 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003740447368421052, + "loss": 0.6504, + "step": 52670 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003739657894736842, + "loss": 0.6578, + "step": 52680 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003738868421052631, + "loss": 0.6597, + "step": 52690 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003738078947368421, + "loss": 0.6533, + "step": 52700 + }, + { + "epoch": 0.53, + "learning_rate": 0.000373728947368421, + "loss": 0.662, + "step": 52710 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037365, + "loss": 0.6571, + "step": 52720 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003735710526315789, + "loss": 0.6505, + "step": 52730 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037349210526315784, + "loss": 0.6408, + "step": 52740 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003734131578947368, + "loss": 0.6477, + "step": 52750 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037333421052631574, + "loss": 0.6433, + "step": 52760 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037325526315789474, + "loss": 0.661, + "step": 52770 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037317631578947364, + "loss": 0.6455, + "step": 52780 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037309736842105264, + "loss": 0.6468, + "step": 52790 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037301842105263153, + "loss": 0.6506, + "step": 52800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037293947368421054, + "loss": 0.6513, + "step": 52810 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037286052631578943, + "loss": 0.651, + "step": 52820 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003727815789473684, + "loss": 0.6478, + "step": 52830 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037270263157894733, + "loss": 0.6523, + "step": 52840 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003726236842105263, + "loss": 0.6505, + "step": 52850 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003725447368421053, + "loss": 0.6493, + "step": 52860 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037246578947368417, + "loss": 0.6512, + "step": 52870 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003723868421052632, + "loss": 0.6598, + "step": 52880 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037230789473684207, + "loss": 0.6508, + "step": 52890 + }, + { + "epoch": 0.53, + "learning_rate": 0.000372228947368421, + "loss": 0.6491, + "step": 52900 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037214999999999997, + "loss": 0.6432, + "step": 52910 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003720710526315789, + "loss": 0.6409, + "step": 52920 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037199210526315786, + "loss": 0.6495, + "step": 52930 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003719131578947368, + "loss": 0.6614, + "step": 52940 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003718342105263158, + "loss": 0.6442, + "step": 52950 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003717552631578947, + "loss": 0.6475, + "step": 52960 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037167631578947366, + "loss": 0.6479, + "step": 52970 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003715973684210526, + "loss": 0.6461, + "step": 52980 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037151842105263155, + "loss": 0.6597, + "step": 52990 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003714394736842105, + "loss": 0.6391, + "step": 53000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037136052631578945, + "loss": 0.662, + "step": 53010 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037128157894736834, + "loss": 0.657, + "step": 53020 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037120263157894735, + "loss": 0.6601, + "step": 53030 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037112368421052624, + "loss": 0.6527, + "step": 53040 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037104473684210524, + "loss": 0.6575, + "step": 53050 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003709657894736842, + "loss": 0.6459, + "step": 53060 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037088684210526314, + "loss": 0.6423, + "step": 53070 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003708078947368421, + "loss": 0.6438, + "step": 53080 + }, + { + "epoch": 0.53, + "learning_rate": 0.000370728947368421, + "loss": 0.6432, + "step": 53090 + }, + { + "epoch": 0.53, + "learning_rate": 0.00037065, + "loss": 0.6415, + "step": 53100 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003705710526315789, + "loss": 0.6409, + "step": 53110 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003704921052631579, + "loss": 0.6501, + "step": 53120 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003704131578947368, + "loss": 0.6508, + "step": 53130 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003703342105263158, + "loss": 0.6429, + "step": 53140 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003702552631578947, + "loss": 0.6593, + "step": 53150 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003701763157894736, + "loss": 0.6582, + "step": 53160 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003700973684210526, + "loss": 0.6425, + "step": 53170 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003700184210526315, + "loss": 0.6582, + "step": 53180 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003699394736842105, + "loss": 0.6554, + "step": 53190 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003698605263157894, + "loss": 0.6587, + "step": 53200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036978947368421054, + "loss": 0.6533, + "step": 53210 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036971052631578944, + "loss": 0.656, + "step": 53220 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036963157894736844, + "loss": 0.6619, + "step": 53230 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036955263157894733, + "loss": 0.6465, + "step": 53240 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003694736842105263, + "loss": 0.6526, + "step": 53250 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036939473684210523, + "loss": 0.6582, + "step": 53260 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003693157894736842, + "loss": 0.6478, + "step": 53270 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036923684210526307, + "loss": 0.6473, + "step": 53280 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003691578947368421, + "loss": 0.6559, + "step": 53290 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003690789473684211, + "loss": 0.6522, + "step": 53300 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036899999999999997, + "loss": 0.6699, + "step": 53310 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003689210526315789, + "loss": 0.6507, + "step": 53320 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036884210526315787, + "loss": 0.6577, + "step": 53330 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003687631578947368, + "loss": 0.6501, + "step": 53340 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036868421052631576, + "loss": 0.6583, + "step": 53350 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003686052631578947, + "loss": 0.6705, + "step": 53360 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003685263157894736, + "loss": 0.6437, + "step": 53370 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003684473684210526, + "loss": 0.6594, + "step": 53380 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036836842105263156, + "loss": 0.669, + "step": 53390 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003682894736842105, + "loss": 0.6464, + "step": 53400 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036821052631578945, + "loss": 0.6407, + "step": 53410 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003681315789473684, + "loss": 0.638, + "step": 53420 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036805263157894735, + "loss": 0.6348, + "step": 53430 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036797368421052624, + "loss": 0.6266, + "step": 53440 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036789473684210525, + "loss": 0.6391, + "step": 53450 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036781578947368414, + "loss": 0.6362, + "step": 53460 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036773684210526314, + "loss": 0.6494, + "step": 53470 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036765789473684204, + "loss": 0.6467, + "step": 53480 + }, + { + "epoch": 0.53, + "learning_rate": 0.00036757894736842104, + "loss": 0.6497, + "step": 53490 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003675, + "loss": 0.6531, + "step": 53500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003674210526315789, + "loss": 0.6574, + "step": 53510 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003673421052631579, + "loss": 0.6492, + "step": 53520 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003672631578947368, + "loss": 0.6602, + "step": 53530 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003671842105263158, + "loss": 0.6606, + "step": 53540 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003671052631578947, + "loss": 0.6547, + "step": 53550 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003670263157894737, + "loss": 0.6544, + "step": 53560 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036694736842105257, + "loss": 0.6526, + "step": 53570 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003668684210526315, + "loss": 0.6398, + "step": 53580 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003667894736842105, + "loss": 0.6504, + "step": 53590 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003667105263157894, + "loss": 0.6406, + "step": 53600 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003666315789473684, + "loss": 0.6389, + "step": 53610 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003665526315789473, + "loss": 0.6466, + "step": 53620 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003664736842105263, + "loss": 0.6452, + "step": 53630 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003663947368421052, + "loss": 0.6412, + "step": 53640 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036631578947368416, + "loss": 0.6413, + "step": 53650 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003662368421052631, + "loss": 0.6475, + "step": 53660 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036615789473684206, + "loss": 0.6469, + "step": 53670 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036607894736842106, + "loss": 0.6475, + "step": 53680 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036599999999999995, + "loss": 0.6378, + "step": 53690 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036592105263157895, + "loss": 0.6466, + "step": 53700 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036584210526315785, + "loss": 0.6557, + "step": 53710 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036576315789473685, + "loss": 0.64, + "step": 53720 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036568421052631574, + "loss": 0.6391, + "step": 53730 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003656052631578947, + "loss": 0.6496, + "step": 53740 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036552631578947364, + "loss": 0.6429, + "step": 53750 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003654473684210526, + "loss": 0.6265, + "step": 53760 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003653684210526316, + "loss": 0.6269, + "step": 53770 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003652894736842105, + "loss": 0.6271, + "step": 53780 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003652105263157895, + "loss": 0.6333, + "step": 53790 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003651315789473684, + "loss": 0.6269, + "step": 53800 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036505263157894733, + "loss": 0.6305, + "step": 53810 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003649736842105263, + "loss": 0.6251, + "step": 53820 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036489473684210523, + "loss": 0.6266, + "step": 53830 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003648157894736842, + "loss": 0.6291, + "step": 53840 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003647368421052631, + "loss": 0.6326, + "step": 53850 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003646578947368421, + "loss": 0.6255, + "step": 53860 + }, + { + "epoch": 0.54, + "learning_rate": 0.000364578947368421, + "loss": 0.6416, + "step": 53870 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036449999999999997, + "loss": 0.6301, + "step": 53880 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003644210526315789, + "loss": 0.6356, + "step": 53890 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036434210526315787, + "loss": 0.6396, + "step": 53900 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003642631578947368, + "loss": 0.6397, + "step": 53910 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036418421052631576, + "loss": 0.624, + "step": 53920 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036410526315789466, + "loss": 0.6294, + "step": 53930 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036402631578947366, + "loss": 0.6254, + "step": 53940 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003639473684210526, + "loss": 0.6404, + "step": 53950 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036386842105263156, + "loss": 0.6266, + "step": 53960 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003637894736842105, + "loss": 0.6206, + "step": 53970 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036371052631578945, + "loss": 0.6368, + "step": 53980 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003636315789473684, + "loss": 0.6368, + "step": 53990 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003635526315789473, + "loss": 0.6188, + "step": 54000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003634736842105263, + "loss": 0.6486, + "step": 54010 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003633947368421052, + "loss": 0.6524, + "step": 54020 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003633157894736842, + "loss": 0.652, + "step": 54030 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003632368421052631, + "loss": 0.6692, + "step": 54040 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003631578947368421, + "loss": 0.6525, + "step": 54050 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036307894736842104, + "loss": 0.66, + "step": 54060 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036299999999999993, + "loss": 0.6666, + "step": 54070 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036292105263157894, + "loss": 0.6658, + "step": 54080 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036284210526315783, + "loss": 0.6607, + "step": 54090 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036276315789473683, + "loss": 0.644, + "step": 54100 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003626842105263157, + "loss": 0.6426, + "step": 54110 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036260526315789473, + "loss": 0.6392, + "step": 54120 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003625263157894736, + "loss": 0.6323, + "step": 54130 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036244736842105257, + "loss": 0.6458, + "step": 54140 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003623684210526316, + "loss": 0.6532, + "step": 54150 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036228947368421047, + "loss": 0.6577, + "step": 54160 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036221052631578947, + "loss": 0.643, + "step": 54170 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036213157894736836, + "loss": 0.6336, + "step": 54180 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036205263157894737, + "loss": 0.6455, + "step": 54190 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036197368421052626, + "loss": 0.6565, + "step": 54200 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036189473684210526, + "loss": 0.6515, + "step": 54210 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036181578947368416, + "loss": 0.6487, + "step": 54220 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003617368421052631, + "loss": 0.6596, + "step": 54230 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003616578947368421, + "loss": 0.6494, + "step": 54240 + }, + { + "epoch": 0.54, + "learning_rate": 0.000361578947368421, + "loss": 0.6481, + "step": 54250 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003615, + "loss": 0.6374, + "step": 54260 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003614210526315789, + "loss": 0.6385, + "step": 54270 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003613421052631579, + "loss": 0.6185, + "step": 54280 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003612631578947368, + "loss": 0.6492, + "step": 54290 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036118421052631574, + "loss": 0.6324, + "step": 54300 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003611052631578947, + "loss": 0.6396, + "step": 54310 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036102631578947364, + "loss": 0.6416, + "step": 54320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036094736842105264, + "loss": 0.6386, + "step": 54330 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036086842105263154, + "loss": 0.6363, + "step": 54340 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036078947368421054, + "loss": 0.6421, + "step": 54350 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036071052631578943, + "loss": 0.6554, + "step": 54360 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003606315789473684, + "loss": 0.6553, + "step": 54370 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036055263157894733, + "loss": 0.6402, + "step": 54380 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003604736842105263, + "loss": 0.6519, + "step": 54390 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003603947368421052, + "loss": 0.657, + "step": 54400 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003603157894736842, + "loss": 0.6611, + "step": 54410 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003602368421052632, + "loss": 0.6669, + "step": 54420 + }, + { + "epoch": 0.54, + "learning_rate": 0.00036015789473684207, + "loss": 0.6622, + "step": 54430 + }, + { + "epoch": 0.54, + "learning_rate": 0.000360078947368421, + "loss": 0.6494, + "step": 54440 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035999999999999997, + "loss": 0.6571, + "step": 54450 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003599210526315789, + "loss": 0.6397, + "step": 54460 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035984210526315787, + "loss": 0.6506, + "step": 54470 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003597631578947368, + "loss": 0.6398, + "step": 54480 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003596842105263157, + "loss": 0.6269, + "step": 54490 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003596052631578947, + "loss": 0.6423, + "step": 54500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035952631578947366, + "loss": 0.6452, + "step": 54510 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035945526315789473, + "loss": 0.644, + "step": 54520 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035937631578947363, + "loss": 0.6432, + "step": 54530 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035929736842105263, + "loss": 0.6419, + "step": 54540 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003592184210526315, + "loss": 0.6584, + "step": 54550 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035913947368421047, + "loss": 0.649, + "step": 54560 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003590605263157894, + "loss": 0.6497, + "step": 54570 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035898157894736837, + "loss": 0.6494, + "step": 54580 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035890263157894737, + "loss": 0.6523, + "step": 54590 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035882368421052627, + "loss": 0.6405, + "step": 54600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035874473684210527, + "loss": 0.6463, + "step": 54610 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035866578947368416, + "loss": 0.6429, + "step": 54620 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035858684210526316, + "loss": 0.6452, + "step": 54630 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035850789473684206, + "loss": 0.639, + "step": 54640 + }, + { + "epoch": 0.55, + "learning_rate": 0.000358428947368421, + "loss": 0.6324, + "step": 54650 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035834999999999996, + "loss": 0.6429, + "step": 54660 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003582710526315789, + "loss": 0.6392, + "step": 54670 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003581921052631579, + "loss": 0.6248, + "step": 54680 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003581131578947368, + "loss": 0.6205, + "step": 54690 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003580342105263158, + "loss": 0.6234, + "step": 54700 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003579552631578947, + "loss": 0.6364, + "step": 54710 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035787631578947365, + "loss": 0.6511, + "step": 54720 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003577973684210526, + "loss": 0.6481, + "step": 54730 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035771842105263154, + "loss": 0.6538, + "step": 54740 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003576394736842105, + "loss": 0.6412, + "step": 54750 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035756052631578944, + "loss": 0.6432, + "step": 54760 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035748157894736844, + "loss": 0.656, + "step": 54770 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035740263157894734, + "loss": 0.6436, + "step": 54780 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003573236842105263, + "loss": 0.6444, + "step": 54790 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035724473684210523, + "loss": 0.636, + "step": 54800 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003571657894736842, + "loss": 0.6347, + "step": 54810 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035708684210526313, + "loss": 0.6237, + "step": 54820 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003570078947368421, + "loss": 0.6384, + "step": 54830 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035692894736842097, + "loss": 0.6369, + "step": 54840 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035685, + "loss": 0.649, + "step": 54850 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003567710526315789, + "loss": 0.6448, + "step": 54860 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035669210526315787, + "loss": 0.6391, + "step": 54870 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003566131578947368, + "loss": 0.6459, + "step": 54880 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035653421052631577, + "loss": 0.6546, + "step": 54890 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003564552631578947, + "loss": 0.6441, + "step": 54900 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003563763157894736, + "loss": 0.6526, + "step": 54910 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003562973684210526, + "loss": 0.6456, + "step": 54920 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003562184210526315, + "loss": 0.6433, + "step": 54930 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003561394736842105, + "loss": 0.6512, + "step": 54940 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035606052631578946, + "loss": 0.6423, + "step": 54950 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003559815789473684, + "loss": 0.6436, + "step": 54960 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035590263157894735, + "loss": 0.6341, + "step": 54970 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035582368421052625, + "loss": 0.6416, + "step": 54980 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035574473684210525, + "loss": 0.6439, + "step": 54990 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035566578947368414, + "loss": 0.625, + "step": 55000 + }, + { + "epoch": 0.55, + "eval_accuracy": 0.8664423590057567, + "eval_loss": 0.62548828125, + "eval_runtime": 97.6676, + "eval_samples_per_second": 819.104, + "eval_steps_per_second": 1.607, + "step": 55000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035558684210526315, + "loss": 0.644, + "step": 55010 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035550789473684204, + "loss": 0.6439, + "step": 55020 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035542894736842104, + "loss": 0.646, + "step": 55030 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035535, + "loss": 0.6523, + "step": 55040 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003552710526315789, + "loss": 0.6389, + "step": 55050 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003551921052631579, + "loss": 0.6624, + "step": 55060 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003551131578947368, + "loss": 0.6395, + "step": 55070 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003550342105263158, + "loss": 0.644, + "step": 55080 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003549552631578947, + "loss": 0.6422, + "step": 55090 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003548763157894737, + "loss": 0.6481, + "step": 55100 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003547973684210526, + "loss": 0.6472, + "step": 55110 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003547184210526316, + "loss": 0.649, + "step": 55120 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035463947368421047, + "loss": 0.6428, + "step": 55130 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003545605263157894, + "loss": 0.6383, + "step": 55140 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003544815789473684, + "loss": 0.6402, + "step": 55150 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003544026315789473, + "loss": 0.6516, + "step": 55160 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003543236842105263, + "loss": 0.6429, + "step": 55170 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003542447368421052, + "loss": 0.6501, + "step": 55180 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003541657894736842, + "loss": 0.647, + "step": 55190 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003540868421052631, + "loss": 0.659, + "step": 55200 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035400789473684206, + "loss": 0.6417, + "step": 55210 + }, + { + "epoch": 0.55, + "learning_rate": 0.000353928947368421, + "loss": 0.6509, + "step": 55220 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035384999999999995, + "loss": 0.6471, + "step": 55230 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035377105263157896, + "loss": 0.6479, + "step": 55240 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035369210526315785, + "loss": 0.6385, + "step": 55250 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035361315789473685, + "loss": 0.6525, + "step": 55260 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035353421052631575, + "loss": 0.6489, + "step": 55270 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003534552631578947, + "loss": 0.6523, + "step": 55280 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035337631578947364, + "loss": 0.657, + "step": 55290 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003532973684210526, + "loss": 0.6315, + "step": 55300 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035321842105263154, + "loss": 0.6426, + "step": 55310 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003531394736842105, + "loss": 0.6449, + "step": 55320 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003530605263157895, + "loss": 0.6538, + "step": 55330 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003529815789473684, + "loss": 0.6431, + "step": 55340 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035290263157894733, + "loss": 0.6439, + "step": 55350 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003528236842105263, + "loss": 0.6624, + "step": 55360 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035274473684210523, + "loss": 0.6426, + "step": 55370 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003526657894736842, + "loss": 0.6441, + "step": 55380 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035258684210526313, + "loss": 0.6522, + "step": 55390 + }, + { + "epoch": 0.55, + "learning_rate": 0.000352507894736842, + "loss": 0.6543, + "step": 55400 + }, + { + "epoch": 0.55, + "learning_rate": 0.000352428947368421, + "loss": 0.6452, + "step": 55410 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035234999999999997, + "loss": 0.6691, + "step": 55420 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003522710526315789, + "loss": 0.6552, + "step": 55430 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035219210526315787, + "loss": 0.6545, + "step": 55440 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003521131578947368, + "loss": 0.6435, + "step": 55450 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035203421052631577, + "loss": 0.6458, + "step": 55460 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035195526315789466, + "loss": 0.6472, + "step": 55470 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035187631578947366, + "loss": 0.6469, + "step": 55480 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035179736842105256, + "loss": 0.6351, + "step": 55490 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035171842105263156, + "loss": 0.6439, + "step": 55500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003516394736842105, + "loss": 0.6513, + "step": 55510 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035156052631578946, + "loss": 0.6383, + "step": 55520 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003514815789473684, + "loss": 0.6356, + "step": 55530 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003514026315789473, + "loss": 0.6444, + "step": 55540 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003513236842105263, + "loss": 0.6421, + "step": 55550 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003512447368421052, + "loss": 0.6535, + "step": 55560 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003511657894736842, + "loss": 0.6477, + "step": 55570 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003510868421052631, + "loss": 0.6432, + "step": 55580 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003510078947368421, + "loss": 0.6337, + "step": 55590 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035092894736842104, + "loss": 0.6365, + "step": 55600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035085, + "loss": 0.6457, + "step": 55610 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035077105263157894, + "loss": 0.6544, + "step": 55620 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035069210526315783, + "loss": 0.6516, + "step": 55630 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035061315789473683, + "loss": 0.6408, + "step": 55640 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035053421052631573, + "loss": 0.6473, + "step": 55650 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035045526315789473, + "loss": 0.6427, + "step": 55660 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003503763157894736, + "loss": 0.6376, + "step": 55670 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035029736842105263, + "loss": 0.6378, + "step": 55680 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003502184210526316, + "loss": 0.6464, + "step": 55690 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035013947368421047, + "loss": 0.6271, + "step": 55700 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003500605263157895, + "loss": 0.6344, + "step": 55710 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034998157894736837, + "loss": 0.648, + "step": 55720 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034990263157894737, + "loss": 0.643, + "step": 55730 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034982368421052626, + "loss": 0.631, + "step": 55740 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034974473684210527, + "loss": 0.6502, + "step": 55750 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034966578947368416, + "loss": 0.6507, + "step": 55760 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003495868421052631, + "loss": 0.6426, + "step": 55770 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034950789473684206, + "loss": 0.6424, + "step": 55780 + }, + { + "epoch": 0.56, + "learning_rate": 0.000349428947368421, + "loss": 0.6392, + "step": 55790 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034935, + "loss": 0.6421, + "step": 55800 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003492710526315789, + "loss": 0.6324, + "step": 55810 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003491921052631579, + "loss": 0.6444, + "step": 55820 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003491131578947368, + "loss": 0.6447, + "step": 55830 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034903421052631575, + "loss": 0.6435, + "step": 55840 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003489552631578947, + "loss": 0.644, + "step": 55850 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034887631578947364, + "loss": 0.646, + "step": 55860 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003487973684210526, + "loss": 0.6475, + "step": 55870 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034871842105263154, + "loss": 0.6389, + "step": 55880 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034863947368421054, + "loss": 0.6506, + "step": 55890 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034856052631578944, + "loss": 0.6348, + "step": 55900 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003484815789473684, + "loss": 0.6432, + "step": 55910 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034840263157894733, + "loss": 0.6391, + "step": 55920 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003483236842105263, + "loss": 0.6329, + "step": 55930 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034824473684210523, + "loss": 0.6301, + "step": 55940 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003481657894736842, + "loss": 0.6469, + "step": 55950 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034808684210526307, + "loss": 0.6357, + "step": 55960 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003480078947368421, + "loss": 0.6221, + "step": 55970 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003479289473684211, + "loss": 0.6339, + "step": 55980 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034784999999999997, + "loss": 0.6337, + "step": 55990 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003477710526315789, + "loss": 0.6332, + "step": 56000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034769210526315787, + "loss": 0.6302, + "step": 56010 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003476131578947368, + "loss": 0.6371, + "step": 56020 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003475342105263157, + "loss": 0.6307, + "step": 56030 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003474552631578947, + "loss": 0.639, + "step": 56040 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003473763157894736, + "loss": 0.6372, + "step": 56050 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003472973684210526, + "loss": 0.6372, + "step": 56060 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034721842105263156, + "loss": 0.63, + "step": 56070 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003471394736842105, + "loss": 0.632, + "step": 56080 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034706052631578945, + "loss": 0.6431, + "step": 56090 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003469815789473684, + "loss": 0.6313, + "step": 56100 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034690263157894735, + "loss": 0.6261, + "step": 56110 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034682368421052625, + "loss": 0.6483, + "step": 56120 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034674473684210525, + "loss": 0.6419, + "step": 56130 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034666578947368414, + "loss": 0.6399, + "step": 56140 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034658684210526314, + "loss": 0.6251, + "step": 56150 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003465078947368421, + "loss": 0.6423, + "step": 56160 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034642894736842104, + "loss": 0.6186, + "step": 56170 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034635, + "loss": 0.641, + "step": 56180 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003462710526315789, + "loss": 0.6452, + "step": 56190 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003461921052631579, + "loss": 0.6445, + "step": 56200 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003461131578947368, + "loss": 0.6446, + "step": 56210 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003460342105263158, + "loss": 0.629, + "step": 56220 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003459552631578947, + "loss": 0.644, + "step": 56230 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003458763157894737, + "loss": 0.6362, + "step": 56240 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034579736842105263, + "loss": 0.6241, + "step": 56250 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003457184210526315, + "loss": 0.6325, + "step": 56260 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003456394736842105, + "loss": 0.6376, + "step": 56270 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003455605263157894, + "loss": 0.6394, + "step": 56280 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003454815789473684, + "loss": 0.6292, + "step": 56290 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003454026315789473, + "loss": 0.6353, + "step": 56300 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003453236842105263, + "loss": 0.6216, + "step": 56310 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003452447368421052, + "loss": 0.6399, + "step": 56320 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034516578947368416, + "loss": 0.6352, + "step": 56330 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003450868421052631, + "loss": 0.6526, + "step": 56340 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003450157894736842, + "loss": 0.6381, + "step": 56350 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034493684210526313, + "loss": 0.6281, + "step": 56360 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003448578947368421, + "loss": 0.6319, + "step": 56370 + }, + { + "epoch": 0.56, + "learning_rate": 0.000344778947368421, + "loss": 0.6304, + "step": 56380 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003447, + "loss": 0.6291, + "step": 56390 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034462105263157887, + "loss": 0.6208, + "step": 56400 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034454210526315787, + "loss": 0.6203, + "step": 56410 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003444631578947368, + "loss": 0.6251, + "step": 56420 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034438421052631577, + "loss": 0.6147, + "step": 56430 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003443052631578947, + "loss": 0.6122, + "step": 56440 + }, + { + "epoch": 0.56, + "learning_rate": 0.00034422631578947367, + "loss": 0.6184, + "step": 56450 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003441473684210526, + "loss": 0.6262, + "step": 56460 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003440684210526315, + "loss": 0.6301, + "step": 56470 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003439894736842105, + "loss": 0.61, + "step": 56480 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003439105263157894, + "loss": 0.6155, + "step": 56490 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003438315789473684, + "loss": 0.6194, + "step": 56500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034375263157894736, + "loss": 0.6331, + "step": 56510 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003436736842105263, + "loss": 0.6275, + "step": 56520 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034359473684210525, + "loss": 0.6382, + "step": 56530 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034351578947368415, + "loss": 0.61, + "step": 56540 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034343684210526315, + "loss": 0.6213, + "step": 56550 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034335789473684204, + "loss": 0.6242, + "step": 56560 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034327894736842105, + "loss": 0.6303, + "step": 56570 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034319999999999994, + "loss": 0.6167, + "step": 56580 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034312105263157894, + "loss": 0.6226, + "step": 56590 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003430421052631579, + "loss": 0.6151, + "step": 56600 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003429631578947368, + "loss": 0.6189, + "step": 56610 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003428842105263158, + "loss": 0.6126, + "step": 56620 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003428052631578947, + "loss": 0.6307, + "step": 56630 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003427263157894737, + "loss": 0.6203, + "step": 56640 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003426473684210526, + "loss": 0.6186, + "step": 56650 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003425684210526316, + "loss": 0.6192, + "step": 56660 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003424894736842105, + "loss": 0.6345, + "step": 56670 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003424105263157894, + "loss": 0.6189, + "step": 56680 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003423315789473684, + "loss": 0.6174, + "step": 56690 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003422526315789473, + "loss": 0.6186, + "step": 56700 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003421736842105263, + "loss": 0.6288, + "step": 56710 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003420947368421052, + "loss": 0.61, + "step": 56720 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003420157894736842, + "loss": 0.6311, + "step": 56730 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003419368421052631, + "loss": 0.6098, + "step": 56740 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034185789473684206, + "loss": 0.6229, + "step": 56750 + }, + { + "epoch": 0.57, + "learning_rate": 0.000341778947368421, + "loss": 0.6144, + "step": 56760 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034169999999999996, + "loss": 0.6202, + "step": 56770 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003416210526315789, + "loss": 0.6269, + "step": 56780 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034154210526315785, + "loss": 0.6207, + "step": 56790 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034146315789473686, + "loss": 0.6011, + "step": 56800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034138421052631575, + "loss": 0.6114, + "step": 56810 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034130526315789475, + "loss": 0.6172, + "step": 56820 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034122631578947365, + "loss": 0.619, + "step": 56830 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003411473684210526, + "loss": 0.6259, + "step": 56840 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034106842105263154, + "loss": 0.6353, + "step": 56850 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003409894736842105, + "loss": 0.6145, + "step": 56860 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003409105263157894, + "loss": 0.6306, + "step": 56870 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003408315789473684, + "loss": 0.6264, + "step": 56880 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003407526315789474, + "loss": 0.636, + "step": 56890 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003406736842105263, + "loss": 0.6438, + "step": 56900 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034059473684210523, + "loss": 0.6417, + "step": 56910 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003405157894736842, + "loss": 0.6303, + "step": 56920 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034043684210526313, + "loss": 0.6296, + "step": 56930 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003403578947368421, + "loss": 0.6394, + "step": 56940 + }, + { + "epoch": 0.57, + "learning_rate": 0.000340278947368421, + "loss": 0.6436, + "step": 56950 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003401999999999999, + "loss": 0.6471, + "step": 56960 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003401210526315789, + "loss": 0.645, + "step": 56970 + }, + { + "epoch": 0.57, + "learning_rate": 0.00034004210526315787, + "loss": 0.6309, + "step": 56980 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003399631578947368, + "loss": 0.6183, + "step": 56990 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033988421052631577, + "loss": 0.6312, + "step": 57000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003398052631578947, + "loss": 0.6371, + "step": 57010 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033972631578947366, + "loss": 0.6384, + "step": 57020 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033964736842105256, + "loss": 0.6484, + "step": 57030 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033956842105263156, + "loss": 0.6298, + "step": 57040 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033948947368421046, + "loss": 0.6307, + "step": 57050 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033941052631578946, + "loss": 0.6307, + "step": 57060 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003393315789473684, + "loss": 0.6244, + "step": 57070 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033925263157894735, + "loss": 0.631, + "step": 57080 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003391736842105263, + "loss": 0.638, + "step": 57090 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003390947368421052, + "loss": 0.6357, + "step": 57100 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003390157894736842, + "loss": 0.6357, + "step": 57110 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003389368421052631, + "loss": 0.6268, + "step": 57120 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003388578947368421, + "loss": 0.6304, + "step": 57130 + }, + { + "epoch": 0.57, + "learning_rate": 0.000338778947368421, + "loss": 0.6377, + "step": 57140 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003387, + "loss": 0.6362, + "step": 57150 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033862105263157894, + "loss": 0.6273, + "step": 57160 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033854210526315784, + "loss": 0.6422, + "step": 57170 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033846315789473684, + "loss": 0.6449, + "step": 57180 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033838421052631573, + "loss": 0.6322, + "step": 57190 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033830526315789473, + "loss": 0.6326, + "step": 57200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033822631578947363, + "loss": 0.6288, + "step": 57210 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033814736842105263, + "loss": 0.6461, + "step": 57220 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003380684210526315, + "loss": 0.637, + "step": 57230 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003379894736842105, + "loss": 0.637, + "step": 57240 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003379105263157895, + "loss": 0.6544, + "step": 57250 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033783157894736837, + "loss": 0.6205, + "step": 57260 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033775263157894737, + "loss": 0.6239, + "step": 57270 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033767368421052627, + "loss": 0.6405, + "step": 57280 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033759473684210527, + "loss": 0.6491, + "step": 57290 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033751578947368416, + "loss": 0.636, + "step": 57300 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033743684210526317, + "loss": 0.633, + "step": 57310 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033735789473684206, + "loss": 0.6305, + "step": 57320 + }, + { + "epoch": 0.57, + "learning_rate": 0.000337278947368421, + "loss": 0.634, + "step": 57330 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003372, + "loss": 0.6291, + "step": 57340 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003371210526315789, + "loss": 0.6426, + "step": 57350 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003370421052631579, + "loss": 0.6482, + "step": 57360 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003369631578947368, + "loss": 0.6401, + "step": 57370 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003368842105263158, + "loss": 0.6438, + "step": 57380 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003368052631578947, + "loss": 0.6434, + "step": 57390 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033672631578947365, + "loss": 0.6377, + "step": 57400 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003366473684210526, + "loss": 0.6433, + "step": 57410 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033656842105263154, + "loss": 0.6356, + "step": 57420 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003364894736842105, + "loss": 0.6279, + "step": 57430 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033641052631578944, + "loss": 0.634, + "step": 57440 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033633157894736844, + "loss": 0.6353, + "step": 57450 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033625263157894734, + "loss": 0.6357, + "step": 57460 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003361736842105263, + "loss": 0.6419, + "step": 57470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033609473684210523, + "loss": 0.6388, + "step": 57480 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003360157894736842, + "loss": 0.6252, + "step": 57490 + }, + { + "epoch": 0.57, + "learning_rate": 0.00033593684210526313, + "loss": 0.6428, + "step": 57500 + }, + { + "epoch": 0.57, + "eval_accuracy": 0.8676777163075269, + "eval_loss": 0.61767578125, + "eval_runtime": 96.8045, + "eval_samples_per_second": 826.408, + "eval_steps_per_second": 1.622, + "step": 57500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003358578947368421, + "loss": 0.6304, + "step": 57510 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033577894736842097, + "loss": 0.6299, + "step": 57520 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003357, + "loss": 0.6365, + "step": 57530 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003356210526315789, + "loss": 0.6266, + "step": 57540 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033554210526315787, + "loss": 0.6205, + "step": 57550 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003354631578947368, + "loss": 0.6284, + "step": 57560 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033538421052631577, + "loss": 0.6286, + "step": 57570 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003353052631578947, + "loss": 0.6389, + "step": 57580 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003352263157894736, + "loss": 0.6443, + "step": 57590 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003351473684210526, + "loss": 0.6421, + "step": 57600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003350684210526315, + "loss": 0.6325, + "step": 57610 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003349894736842105, + "loss": 0.6377, + "step": 57620 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033491052631578946, + "loss": 0.6283, + "step": 57630 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003348315789473684, + "loss": 0.6312, + "step": 57640 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033475263157894735, + "loss": 0.6421, + "step": 57650 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033467368421052625, + "loss": 0.6334, + "step": 57660 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033459473684210525, + "loss": 0.6284, + "step": 57670 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033451578947368414, + "loss": 0.6297, + "step": 57680 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033443684210526315, + "loss": 0.6359, + "step": 57690 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033435789473684204, + "loss": 0.6484, + "step": 57700 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033427894736842104, + "loss": 0.6333, + "step": 57710 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003342, + "loss": 0.6478, + "step": 57720 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003341210526315789, + "loss": 0.6257, + "step": 57730 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003340421052631579, + "loss": 0.6177, + "step": 57740 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003339631578947368, + "loss": 0.6136, + "step": 57750 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003338842105263158, + "loss": 0.6088, + "step": 57760 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003338052631578947, + "loss": 0.6189, + "step": 57770 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003337263157894737, + "loss": 0.6258, + "step": 57780 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003336473684210526, + "loss": 0.623, + "step": 57790 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003335684210526316, + "loss": 0.6204, + "step": 57800 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003334894736842105, + "loss": 0.6175, + "step": 57810 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003334105263157894, + "loss": 0.6306, + "step": 57820 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003333315789473684, + "loss": 0.641, + "step": 57830 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003332526315789473, + "loss": 0.6432, + "step": 57840 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003331736842105263, + "loss": 0.6333, + "step": 57850 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003330947368421052, + "loss": 0.6337, + "step": 57860 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003330157894736842, + "loss": 0.6331, + "step": 57870 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003329368421052631, + "loss": 0.6361, + "step": 57880 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033285789473684206, + "loss": 0.6286, + "step": 57890 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033277894736842106, + "loss": 0.6331, + "step": 57900 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033269999999999996, + "loss": 0.6294, + "step": 57910 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033262105263157896, + "loss": 0.6184, + "step": 57920 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033254210526315785, + "loss": 0.6229, + "step": 57930 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033246315789473685, + "loss": 0.6278, + "step": 57940 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033238421052631575, + "loss": 0.6325, + "step": 57950 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003323052631578947, + "loss": 0.6175, + "step": 57960 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033222631578947365, + "loss": 0.6318, + "step": 57970 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003321473684210526, + "loss": 0.6412, + "step": 57980 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033206842105263154, + "loss": 0.6204, + "step": 57990 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003319894736842105, + "loss": 0.6178, + "step": 58000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003319105263157895, + "loss": 0.631, + "step": 58010 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003318315789473684, + "loss": 0.6335, + "step": 58020 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033175263157894734, + "loss": 0.629, + "step": 58030 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003316736842105263, + "loss": 0.6225, + "step": 58040 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033159473684210523, + "loss": 0.6384, + "step": 58050 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003315157894736842, + "loss": 0.6271, + "step": 58060 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033143684210526313, + "loss": 0.6272, + "step": 58070 + }, + { + "epoch": 0.58, + "learning_rate": 0.000331357894736842, + "loss": 0.6344, + "step": 58080 + }, + { + "epoch": 0.58, + "learning_rate": 0.000331278947368421, + "loss": 0.6287, + "step": 58090 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003312, + "loss": 0.6213, + "step": 58100 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003311210526315789, + "loss": 0.6273, + "step": 58110 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033104210526315787, + "loss": 0.63, + "step": 58120 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003309631578947368, + "loss": 0.6309, + "step": 58130 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033088421052631577, + "loss": 0.6349, + "step": 58140 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033080526315789466, + "loss": 0.6352, + "step": 58150 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033072631578947366, + "loss": 0.6297, + "step": 58160 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033064736842105256, + "loss": 0.6297, + "step": 58170 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033056842105263156, + "loss": 0.6383, + "step": 58180 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003304894736842105, + "loss": 0.6287, + "step": 58190 + }, + { + "epoch": 0.58, + "learning_rate": 0.00033041052631578946, + "loss": 0.6298, + "step": 58200 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003303315789473684, + "loss": 0.6346, + "step": 58210 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003302526315789473, + "loss": 0.6338, + "step": 58220 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003301736842105263, + "loss": 0.6466, + "step": 58230 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003300947368421052, + "loss": 0.6371, + "step": 58240 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003300157894736842, + "loss": 0.6309, + "step": 58250 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003299368421052631, + "loss": 0.6383, + "step": 58260 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003298578947368421, + "loss": 0.6353, + "step": 58270 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032977894736842104, + "loss": 0.6262, + "step": 58280 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003297, + "loss": 0.6326, + "step": 58290 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032962105263157894, + "loss": 0.6334, + "step": 58300 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032954210526315783, + "loss": 0.6404, + "step": 58310 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032946315789473684, + "loss": 0.6286, + "step": 58320 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032938421052631573, + "loss": 0.6268, + "step": 58330 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032930526315789473, + "loss": 0.6331, + "step": 58340 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032923421052631575, + "loss": 0.6318, + "step": 58350 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032915526315789476, + "loss": 0.6397, + "step": 58360 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032907631578947365, + "loss": 0.6216, + "step": 58370 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003289973684210526, + "loss": 0.6413, + "step": 58380 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032891842105263155, + "loss": 0.6298, + "step": 58390 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003288394736842105, + "loss": 0.6345, + "step": 58400 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032876052631578944, + "loss": 0.6322, + "step": 58410 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003286815789473684, + "loss": 0.638, + "step": 58420 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003286026315789474, + "loss": 0.6288, + "step": 58430 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003285236842105263, + "loss": 0.6297, + "step": 58440 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032844473684210524, + "loss": 0.6264, + "step": 58450 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003283657894736842, + "loss": 0.6328, + "step": 58460 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032828684210526313, + "loss": 0.6424, + "step": 58470 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003282078947368421, + "loss": 0.6358, + "step": 58480 + }, + { + "epoch": 0.58, + "learning_rate": 0.00032812894736842103, + "loss": 0.6158, + "step": 58490 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003280499999999999, + "loss": 0.6232, + "step": 58500 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003279710526315789, + "loss": 0.6338, + "step": 58510 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003278921052631578, + "loss": 0.6393, + "step": 58520 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003278131578947368, + "loss": 0.6383, + "step": 58530 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032773421052631577, + "loss": 0.6386, + "step": 58540 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003276552631578947, + "loss": 0.6394, + "step": 58550 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032757631578947367, + "loss": 0.6503, + "step": 58560 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032749736842105256, + "loss": 0.6437, + "step": 58570 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032741842105263156, + "loss": 0.6389, + "step": 58580 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003273473684210526, + "loss": 0.6397, + "step": 58590 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003272684210526316, + "loss": 0.6371, + "step": 58600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003271894736842105, + "loss": 0.6372, + "step": 58610 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003271105263157895, + "loss": 0.6464, + "step": 58620 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003270315789473684, + "loss": 0.6482, + "step": 58630 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003269526315789474, + "loss": 0.6394, + "step": 58640 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003268736842105263, + "loss": 0.6346, + "step": 58650 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003267947368421052, + "loss": 0.6476, + "step": 58660 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032671578947368417, + "loss": 0.6379, + "step": 58670 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003266368421052631, + "loss": 0.6384, + "step": 58680 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003265578947368421, + "loss": 0.6229, + "step": 58690 + }, + { + "epoch": 0.59, + "learning_rate": 0.000326478947368421, + "loss": 0.6252, + "step": 58700 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003264, + "loss": 0.6342, + "step": 58710 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003263210526315789, + "loss": 0.6275, + "step": 58720 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032624210526315786, + "loss": 0.6402, + "step": 58730 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003261631578947368, + "loss": 0.6292, + "step": 58740 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032608421052631576, + "loss": 0.6141, + "step": 58750 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003260052631578947, + "loss": 0.6227, + "step": 58760 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032592631578947365, + "loss": 0.636, + "step": 58770 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032584736842105266, + "loss": 0.6358, + "step": 58780 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032576842105263155, + "loss": 0.6214, + "step": 58790 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003256894736842105, + "loss": 0.6314, + "step": 58800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032561052631578945, + "loss": 0.6404, + "step": 58810 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003255315789473684, + "loss": 0.6314, + "step": 58820 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032545263157894734, + "loss": 0.6287, + "step": 58830 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003253736842105263, + "loss": 0.635, + "step": 58840 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003252947368421052, + "loss": 0.632, + "step": 58850 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003252157894736842, + "loss": 0.6224, + "step": 58860 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032513684210526314, + "loss": 0.6334, + "step": 58870 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003250578947368421, + "loss": 0.6179, + "step": 58880 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032497894736842103, + "loss": 0.6306, + "step": 58890 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003249, + "loss": 0.6364, + "step": 58900 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032482105263157893, + "loss": 0.6397, + "step": 58910 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003247421052631578, + "loss": 0.6311, + "step": 58920 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032466315789473683, + "loss": 0.6396, + "step": 58930 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003245842105263157, + "loss": 0.6295, + "step": 58940 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003245052631578947, + "loss": 0.6268, + "step": 58950 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003244263157894736, + "loss": 0.6296, + "step": 58960 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003243473684210526, + "loss": 0.6345, + "step": 58970 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032426842105263157, + "loss": 0.6227, + "step": 58980 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032418947368421046, + "loss": 0.6243, + "step": 58990 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032411052631578947, + "loss": 0.6189, + "step": 59000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032403157894736836, + "loss": 0.6153, + "step": 59010 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032395263157894736, + "loss": 0.6217, + "step": 59020 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032387368421052626, + "loss": 0.6358, + "step": 59030 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032379473684210526, + "loss": 0.6353, + "step": 59040 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032371578947368415, + "loss": 0.6306, + "step": 59050 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003236368421052631, + "loss": 0.627, + "step": 59060 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003235578947368421, + "loss": 0.6333, + "step": 59070 + }, + { + "epoch": 0.59, + "learning_rate": 0.000323478947368421, + "loss": 0.6257, + "step": 59080 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003234, + "loss": 0.6248, + "step": 59090 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003233210526315789, + "loss": 0.6364, + "step": 59100 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003232421052631579, + "loss": 0.6307, + "step": 59110 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003231631578947368, + "loss": 0.622, + "step": 59120 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003230842105263158, + "loss": 0.6147, + "step": 59130 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003230052631578947, + "loss": 0.6204, + "step": 59140 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032292631578947364, + "loss": 0.6294, + "step": 59150 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032284736842105264, + "loss": 0.6309, + "step": 59160 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032276842105263153, + "loss": 0.6282, + "step": 59170 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032268947368421054, + "loss": 0.6147, + "step": 59180 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032261052631578943, + "loss": 0.6206, + "step": 59190 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032253157894736843, + "loss": 0.6234, + "step": 59200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003224526315789473, + "loss": 0.6262, + "step": 59210 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003223736842105263, + "loss": 0.6348, + "step": 59220 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003222947368421052, + "loss": 0.6257, + "step": 59230 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032221578947368417, + "loss": 0.6284, + "step": 59240 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003221368421052632, + "loss": 0.6237, + "step": 59250 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032205789473684207, + "loss": 0.6259, + "step": 59260 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032197894736842107, + "loss": 0.6308, + "step": 59270 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032189999999999996, + "loss": 0.6322, + "step": 59280 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003218210526315789, + "loss": 0.6329, + "step": 59290 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032174210526315786, + "loss": 0.6196, + "step": 59300 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003216631578947368, + "loss": 0.619, + "step": 59310 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032158421052631576, + "loss": 0.6221, + "step": 59320 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003215052631578947, + "loss": 0.6213, + "step": 59330 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003214263157894737, + "loss": 0.6261, + "step": 59340 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003213473684210526, + "loss": 0.6266, + "step": 59350 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032126842105263155, + "loss": 0.6212, + "step": 59360 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003211894736842105, + "loss": 0.6213, + "step": 59370 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032111052631578945, + "loss": 0.6246, + "step": 59380 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003210315789473684, + "loss": 0.6276, + "step": 59390 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032095263157894734, + "loss": 0.6341, + "step": 59400 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032087368421052624, + "loss": 0.6288, + "step": 59410 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032079473684210524, + "loss": 0.6212, + "step": 59420 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003207157894736842, + "loss": 0.633, + "step": 59430 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032063684210526314, + "loss": 0.6285, + "step": 59440 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003205578947368421, + "loss": 0.6212, + "step": 59450 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032047894736842103, + "loss": 0.6343, + "step": 59460 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003204, + "loss": 0.6328, + "step": 59470 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003203210526315789, + "loss": 0.6272, + "step": 59480 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003202421052631579, + "loss": 0.6278, + "step": 59490 + }, + { + "epoch": 0.59, + "learning_rate": 0.00032016315789473677, + "loss": 0.6287, + "step": 59500 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003200842105263158, + "loss": 0.6277, + "step": 59510 + }, + { + "epoch": 0.6, + "learning_rate": 0.00032000526315789467, + "loss": 0.6342, + "step": 59520 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031992631578947367, + "loss": 0.6318, + "step": 59530 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003198473684210526, + "loss": 0.6406, + "step": 59540 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003197684210526315, + "loss": 0.6313, + "step": 59550 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003196894736842105, + "loss": 0.6356, + "step": 59560 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003196105263157894, + "loss": 0.6193, + "step": 59570 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003195315789473684, + "loss": 0.6304, + "step": 59580 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003194526315789473, + "loss": 0.6199, + "step": 59590 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003193736842105263, + "loss": 0.6353, + "step": 59600 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003192947368421052, + "loss": 0.613, + "step": 59610 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003192157894736842, + "loss": 0.6261, + "step": 59620 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031913684210526315, + "loss": 0.6297, + "step": 59630 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031905789473684205, + "loss": 0.6254, + "step": 59640 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031897894736842105, + "loss": 0.6246, + "step": 59650 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031889999999999995, + "loss": 0.6192, + "step": 59660 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031882105263157895, + "loss": 0.6128, + "step": 59670 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031874210526315784, + "loss": 0.6194, + "step": 59680 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031866315789473684, + "loss": 0.6397, + "step": 59690 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031858421052631574, + "loss": 0.635, + "step": 59700 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003185052631578947, + "loss": 0.6295, + "step": 59710 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003184263157894737, + "loss": 0.6401, + "step": 59720 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003183473684210526, + "loss": 0.6183, + "step": 59730 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031827631578947366, + "loss": 0.6335, + "step": 59740 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003181973684210526, + "loss": 0.6204, + "step": 59750 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003181184210526315, + "loss": 0.616, + "step": 59760 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003180394736842105, + "loss": 0.6197, + "step": 59770 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031796052631578945, + "loss": 0.6293, + "step": 59780 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003178815789473684, + "loss": 0.6145, + "step": 59790 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031780263157894735, + "loss": 0.6298, + "step": 59800 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003177236842105263, + "loss": 0.6333, + "step": 59810 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031764473684210524, + "loss": 0.6434, + "step": 59820 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031756578947368414, + "loss": 0.6417, + "step": 59830 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031748684210526314, + "loss": 0.637, + "step": 59840 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031740789473684204, + "loss": 0.6227, + "step": 59850 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031732894736842104, + "loss": 0.6152, + "step": 59860 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031725, + "loss": 0.633, + "step": 59870 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031717105263157893, + "loss": 0.6391, + "step": 59880 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003170921052631579, + "loss": 0.623, + "step": 59890 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003170131578947368, + "loss": 0.6313, + "step": 59900 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003169342105263158, + "loss": 0.6346, + "step": 59910 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003168552631578947, + "loss": 0.6354, + "step": 59920 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003167763157894737, + "loss": 0.6216, + "step": 59930 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031669736842105257, + "loss": 0.6221, + "step": 59940 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031661842105263157, + "loss": 0.613, + "step": 59950 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003165394736842105, + "loss": 0.6269, + "step": 59960 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003164605263157894, + "loss": 0.6301, + "step": 59970 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003163815789473684, + "loss": 0.6229, + "step": 59980 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003163026315789473, + "loss": 0.6208, + "step": 59990 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003162236842105263, + "loss": 0.6275, + "step": 60000 + }, + { + "epoch": 0.6, + "eval_accuracy": 0.8692195892541361, + "eval_loss": 0.61181640625, + "eval_runtime": 97.6328, + "eval_samples_per_second": 819.396, + "eval_steps_per_second": 1.608, + "step": 60000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003161447368421052, + "loss": 0.632, + "step": 60010 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003160657894736842, + "loss": 0.637, + "step": 60020 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003159868421052631, + "loss": 0.6263, + "step": 60030 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003159078947368421, + "loss": 0.6303, + "step": 60040 + }, + { + "epoch": 0.6, + "learning_rate": 0.000315828947368421, + "loss": 0.6338, + "step": 60050 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031574999999999995, + "loss": 0.6278, + "step": 60060 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031567105263157895, + "loss": 0.6364, + "step": 60070 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031559210526315785, + "loss": 0.6261, + "step": 60080 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031551315789473685, + "loss": 0.6224, + "step": 60090 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031543421052631574, + "loss": 0.6252, + "step": 60100 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031535526315789475, + "loss": 0.6245, + "step": 60110 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031527631578947364, + "loss": 0.6357, + "step": 60120 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003151973684210526, + "loss": 0.6277, + "step": 60130 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031511842105263154, + "loss": 0.6235, + "step": 60140 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003150394736842105, + "loss": 0.6275, + "step": 60150 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003149605263157895, + "loss": 0.625, + "step": 60160 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003148815789473684, + "loss": 0.6379, + "step": 60170 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003148026315789474, + "loss": 0.6249, + "step": 60180 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003147236842105263, + "loss": 0.633, + "step": 60190 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003146447368421052, + "loss": 0.6364, + "step": 60200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003145657894736842, + "loss": 0.6279, + "step": 60210 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003144868421052631, + "loss": 0.6266, + "step": 60220 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031440789473684207, + "loss": 0.6171, + "step": 60230 + }, + { + "epoch": 0.6, + "learning_rate": 0.000314328947368421, + "loss": 0.6098, + "step": 60240 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031425, + "loss": 0.6407, + "step": 60250 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003141710526315789, + "loss": 0.627, + "step": 60260 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031409210526315786, + "loss": 0.6257, + "step": 60270 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003140131578947368, + "loss": 0.618, + "step": 60280 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031393421052631576, + "loss": 0.6326, + "step": 60290 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003138552631578947, + "loss": 0.634, + "step": 60300 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031377631578947366, + "loss": 0.6198, + "step": 60310 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031369736842105255, + "loss": 0.6212, + "step": 60320 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031361842105263155, + "loss": 0.6295, + "step": 60330 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003135394736842105, + "loss": 0.6275, + "step": 60340 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031346052631578945, + "loss": 0.6225, + "step": 60350 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003133815789473684, + "loss": 0.6308, + "step": 60360 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031330263157894735, + "loss": 0.6313, + "step": 60370 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003132236842105263, + "loss": 0.6352, + "step": 60380 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003131447368421052, + "loss": 0.6164, + "step": 60390 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003130657894736842, + "loss": 0.623, + "step": 60400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003129868421052631, + "loss": 0.6265, + "step": 60410 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003129078947368421, + "loss": 0.6195, + "step": 60420 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031282894736842104, + "loss": 0.6345, + "step": 60430 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031275, + "loss": 0.6402, + "step": 60440 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031267105263157893, + "loss": 0.6309, + "step": 60450 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031259210526315783, + "loss": 0.609, + "step": 60460 + }, + { + "epoch": 0.6, + "learning_rate": 0.00031251315789473683, + "loss": 0.6225, + "step": 60470 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003124342105263157, + "loss": 0.6204, + "step": 60480 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003123552631578947, + "loss": 0.6171, + "step": 60490 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003122763157894736, + "loss": 0.6381, + "step": 60500 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003121973684210526, + "loss": 0.6226, + "step": 60510 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031211842105263157, + "loss": 0.6345, + "step": 60520 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003120394736842105, + "loss": 0.6231, + "step": 60530 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031196052631578947, + "loss": 0.6288, + "step": 60540 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031188157894736836, + "loss": 0.6224, + "step": 60550 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031180263157894737, + "loss": 0.6392, + "step": 60560 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031172368421052626, + "loss": 0.6262, + "step": 60570 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031164473684210526, + "loss": 0.6266, + "step": 60580 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031156578947368416, + "loss": 0.6389, + "step": 60590 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031148684210526316, + "loss": 0.64, + "step": 60600 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031140789473684205, + "loss": 0.6286, + "step": 60610 + }, + { + "epoch": 0.61, + "learning_rate": 0.000311328947368421, + "loss": 0.6186, + "step": 60620 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031125, + "loss": 0.6166, + "step": 60630 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003111710526315789, + "loss": 0.6251, + "step": 60640 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003110921052631579, + "loss": 0.6116, + "step": 60650 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003110131578947368, + "loss": 0.6275, + "step": 60660 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003109342105263158, + "loss": 0.6184, + "step": 60670 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003108552631578947, + "loss": 0.6228, + "step": 60680 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031077631578947364, + "loss": 0.613, + "step": 60690 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003106973684210526, + "loss": 0.6364, + "step": 60700 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031061842105263154, + "loss": 0.6166, + "step": 60710 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031053947368421054, + "loss": 0.6254, + "step": 60720 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031046052631578943, + "loss": 0.6301, + "step": 60730 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031038157894736843, + "loss": 0.6314, + "step": 60740 + }, + { + "epoch": 0.61, + "learning_rate": 0.00031030263157894733, + "loss": 0.6388, + "step": 60750 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003102236842105263, + "loss": 0.6364, + "step": 60760 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003101447368421052, + "loss": 0.6293, + "step": 60770 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003100657894736842, + "loss": 0.6234, + "step": 60780 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003099868421052631, + "loss": 0.6111, + "step": 60790 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030990789473684207, + "loss": 0.6188, + "step": 60800 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030982894736842107, + "loss": 0.6154, + "step": 60810 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030974999999999997, + "loss": 0.6339, + "step": 60820 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003096710526315789, + "loss": 0.6194, + "step": 60830 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030959210526315786, + "loss": 0.6195, + "step": 60840 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003095131578947368, + "loss": 0.6186, + "step": 60850 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030943421052631576, + "loss": 0.6165, + "step": 60860 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003093552631578947, + "loss": 0.6314, + "step": 60870 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003092763157894736, + "loss": 0.6307, + "step": 60880 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003091973684210526, + "loss": 0.6341, + "step": 60890 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003091184210526316, + "loss": 0.6156, + "step": 60900 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003090394736842105, + "loss": 0.6256, + "step": 60910 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030896052631578945, + "loss": 0.63, + "step": 60920 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003088815789473684, + "loss": 0.632, + "step": 60930 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030880263157894735, + "loss": 0.6306, + "step": 60940 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030872368421052624, + "loss": 0.6324, + "step": 60950 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030864473684210524, + "loss": 0.6291, + "step": 60960 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030856578947368414, + "loss": 0.6204, + "step": 60970 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030848684210526314, + "loss": 0.6199, + "step": 60980 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003084078947368421, + "loss": 0.6295, + "step": 60990 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030832894736842104, + "loss": 0.6233, + "step": 61000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030825, + "loss": 0.6229, + "step": 61010 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030817105263157893, + "loss": 0.6079, + "step": 61020 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003080921052631579, + "loss": 0.6135, + "step": 61030 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003080131578947368, + "loss": 0.6028, + "step": 61040 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003079342105263158, + "loss": 0.6226, + "step": 61050 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030785526315789467, + "loss": 0.6309, + "step": 61060 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003077763157894737, + "loss": 0.6358, + "step": 61070 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003076973684210526, + "loss": 0.6278, + "step": 61080 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030761842105263157, + "loss": 0.6392, + "step": 61090 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003075394736842105, + "loss": 0.6313, + "step": 61100 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003074605263157894, + "loss": 0.6166, + "step": 61110 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003073815789473684, + "loss": 0.6287, + "step": 61120 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003073026315789473, + "loss": 0.6237, + "step": 61130 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003072236842105263, + "loss": 0.6303, + "step": 61140 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030715263157894733, + "loss": 0.6295, + "step": 61150 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030707368421052634, + "loss": 0.6245, + "step": 61160 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030699473684210523, + "loss": 0.6352, + "step": 61170 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003069157894736842, + "loss": 0.6223, + "step": 61180 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003068368421052631, + "loss": 0.6271, + "step": 61190 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003067578947368421, + "loss": 0.6274, + "step": 61200 + }, + { + "epoch": 0.61, + "learning_rate": 0.000306678947368421, + "loss": 0.6205, + "step": 61210 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030659999999999997, + "loss": 0.6145, + "step": 61220 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030652105263157887, + "loss": 0.6268, + "step": 61230 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030644210526315787, + "loss": 0.6302, + "step": 61240 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030636315789473687, + "loss": 0.6325, + "step": 61250 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030628421052631576, + "loss": 0.6452, + "step": 61260 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003062052631578947, + "loss": 0.6215, + "step": 61270 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030612631578947366, + "loss": 0.6305, + "step": 61280 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003060473684210526, + "loss": 0.6314, + "step": 61290 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003059684210526315, + "loss": 0.6411, + "step": 61300 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003058894736842105, + "loss": 0.639, + "step": 61310 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003058105263157894, + "loss": 0.6268, + "step": 61320 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003057315789473684, + "loss": 0.6194, + "step": 61330 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030565263157894735, + "loss": 0.6243, + "step": 61340 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003055736842105263, + "loss": 0.6305, + "step": 61350 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030549473684210525, + "loss": 0.6338, + "step": 61360 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003054157894736842, + "loss": 0.6212, + "step": 61370 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030533684210526314, + "loss": 0.622, + "step": 61380 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030525789473684204, + "loss": 0.6349, + "step": 61390 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030517894736842104, + "loss": 0.6355, + "step": 61400 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030509999999999994, + "loss": 0.6128, + "step": 61410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030502105263157894, + "loss": 0.6311, + "step": 61420 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003049421052631579, + "loss": 0.6299, + "step": 61430 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030486315789473683, + "loss": 0.6296, + "step": 61440 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003047842105263158, + "loss": 0.6288, + "step": 61450 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003047052631578947, + "loss": 0.6322, + "step": 61460 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003046263157894737, + "loss": 0.642, + "step": 61470 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003045473684210526, + "loss": 0.6117, + "step": 61480 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003044684210526316, + "loss": 0.6149, + "step": 61490 + }, + { + "epoch": 0.61, + "learning_rate": 0.00030438947368421047, + "loss": 0.6307, + "step": 61500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030431052631578947, + "loss": 0.6317, + "step": 61510 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003042315789473684, + "loss": 0.6253, + "step": 61520 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003041526315789473, + "loss": 0.6289, + "step": 61530 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003040736842105263, + "loss": 0.6247, + "step": 61540 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003039947368421052, + "loss": 0.6246, + "step": 61550 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003039157894736842, + "loss": 0.6224, + "step": 61560 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003038368421052631, + "loss": 0.6232, + "step": 61570 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003037578947368421, + "loss": 0.632, + "step": 61580 + }, + { + "epoch": 0.62, + "learning_rate": 0.000303678947368421, + "loss": 0.6228, + "step": 61590 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030359999999999995, + "loss": 0.6365, + "step": 61600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030352105263157896, + "loss": 0.632, + "step": 61610 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030344210526315785, + "loss": 0.6316, + "step": 61620 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030336315789473685, + "loss": 0.639, + "step": 61630 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030328421052631575, + "loss": 0.6287, + "step": 61640 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030320526315789475, + "loss": 0.622, + "step": 61650 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030312631578947364, + "loss": 0.6274, + "step": 61660 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003030473684210526, + "loss": 0.6117, + "step": 61670 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030296842105263154, + "loss": 0.6234, + "step": 61680 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003028894736842105, + "loss": 0.6216, + "step": 61690 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030281052631578944, + "loss": 0.6374, + "step": 61700 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003027315789473684, + "loss": 0.6377, + "step": 61710 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003026526315789474, + "loss": 0.6358, + "step": 61720 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003025736842105263, + "loss": 0.6196, + "step": 61730 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003024947368421053, + "loss": 0.6232, + "step": 61740 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003024157894736842, + "loss": 0.6274, + "step": 61750 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003023368421052631, + "loss": 0.6398, + "step": 61760 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003022578947368421, + "loss": 0.6381, + "step": 61770 + }, + { + "epoch": 0.62, + "learning_rate": 0.000302178947368421, + "loss": 0.6327, + "step": 61780 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003020999999999999, + "loss": 0.621, + "step": 61790 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003020210526315789, + "loss": 0.6335, + "step": 61800 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003019421052631579, + "loss": 0.6233, + "step": 61810 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003018631578947368, + "loss": 0.629, + "step": 61820 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030178421052631576, + "loss": 0.6417, + "step": 61830 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003017052631578947, + "loss": 0.627, + "step": 61840 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030162631578947366, + "loss": 0.6304, + "step": 61850 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003015473684210526, + "loss": 0.6376, + "step": 61860 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030146842105263156, + "loss": 0.6327, + "step": 61870 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030138947368421045, + "loss": 0.619, + "step": 61880 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030131052631578945, + "loss": 0.6299, + "step": 61890 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003012315789473684, + "loss": 0.631, + "step": 61900 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030115263157894735, + "loss": 0.6322, + "step": 61910 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003010736842105263, + "loss": 0.6164, + "step": 61920 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030099473684210525, + "loss": 0.621, + "step": 61930 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003009157894736842, + "loss": 0.631, + "step": 61940 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003008368421052631, + "loss": 0.6438, + "step": 61950 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003007578947368421, + "loss": 0.6347, + "step": 61960 + }, + { + "epoch": 0.62, + "learning_rate": 0.000300678947368421, + "loss": 0.634, + "step": 61970 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003006, + "loss": 0.6197, + "step": 61980 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030052105263157894, + "loss": 0.5923, + "step": 61990 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003004421052631579, + "loss": 0.6237, + "step": 62000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030036315789473683, + "loss": 0.6013, + "step": 62010 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030028421052631573, + "loss": 0.6049, + "step": 62020 + }, + { + "epoch": 0.62, + "learning_rate": 0.00030020526315789473, + "loss": 0.5971, + "step": 62030 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003001263157894736, + "loss": 0.6033, + "step": 62040 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003000473684210526, + "loss": 0.6055, + "step": 62050 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002999684210526316, + "loss": 0.6101, + "step": 62060 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002998894736842105, + "loss": 0.6141, + "step": 62070 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029981052631578947, + "loss": 0.6111, + "step": 62080 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029973157894736837, + "loss": 0.6125, + "step": 62090 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002996526315789473, + "loss": 0.6139, + "step": 62100 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029957368421052626, + "loss": 0.5965, + "step": 62110 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002994947368421052, + "loss": 0.6188, + "step": 62120 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002994157894736842, + "loss": 0.6104, + "step": 62130 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029933684210526316, + "loss": 0.6224, + "step": 62140 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002992578947368421, + "loss": 0.6328, + "step": 62150 + }, + { + "epoch": 0.62, + "learning_rate": 0.000299178947368421, + "loss": 0.6341, + "step": 62160 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029909999999999995, + "loss": 0.6263, + "step": 62170 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002990210526315789, + "loss": 0.6255, + "step": 62180 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029894210526315785, + "loss": 0.6184, + "step": 62190 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002988631578947368, + "loss": 0.6249, + "step": 62200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029878421052631575, + "loss": 0.6208, + "step": 62210 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029870526315789475, + "loss": 0.6298, + "step": 62220 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002986263157894737, + "loss": 0.621, + "step": 62230 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002985473684210526, + "loss": 0.6203, + "step": 62240 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029846842105263154, + "loss": 0.6323, + "step": 62250 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002983894736842105, + "loss": 0.6298, + "step": 62260 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029831052631578943, + "loss": 0.6397, + "step": 62270 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002982315789473684, + "loss": 0.6296, + "step": 62280 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029815263157894733, + "loss": 0.6239, + "step": 62290 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002980736842105263, + "loss": 0.6316, + "step": 62300 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029799473684210523, + "loss": 0.6312, + "step": 62310 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002979157894736842, + "loss": 0.6491, + "step": 62320 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002978368421052631, + "loss": 0.6374, + "step": 62330 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002977578947368421, + "loss": 0.6282, + "step": 62340 + }, + { + "epoch": 0.62, + "learning_rate": 0.000297678947368421, + "loss": 0.6291, + "step": 62350 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029759999999999997, + "loss": 0.6272, + "step": 62360 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002975210526315789, + "loss": 0.6113, + "step": 62370 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029744210526315787, + "loss": 0.6234, + "step": 62380 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002973631578947368, + "loss": 0.6311, + "step": 62390 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029728421052631576, + "loss": 0.6298, + "step": 62400 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002972052631578947, + "loss": 0.6361, + "step": 62410 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029712631578947366, + "loss": 0.6314, + "step": 62420 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002970473684210526, + "loss": 0.6183, + "step": 62430 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029696842105263156, + "loss": 0.6201, + "step": 62440 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002968894736842105, + "loss": 0.6338, + "step": 62450 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029681052631578945, + "loss": 0.6498, + "step": 62460 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002967315789473684, + "loss": 0.6313, + "step": 62470 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029665263157894735, + "loss": 0.616, + "step": 62480 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002965736842105263, + "loss": 0.6071, + "step": 62490 + }, + { + "epoch": 0.62, + "learning_rate": 0.00029649473684210525, + "loss": 0.6063, + "step": 62500 + }, + { + "epoch": 0.62, + "eval_accuracy": 0.8706084189495512, + "eval_loss": 0.60107421875, + "eval_runtime": 97.0491, + "eval_samples_per_second": 824.325, + "eval_steps_per_second": 1.618, + "step": 62500 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002964157894736842, + "loss": 0.5934, + "step": 62510 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029633684210526314, + "loss": 0.6107, + "step": 62520 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002962578947368421, + "loss": 0.6027, + "step": 62530 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029617894736842104, + "loss": 0.6043, + "step": 62540 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002961, + "loss": 0.602, + "step": 62550 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029602105263157894, + "loss": 0.6035, + "step": 62560 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002959421052631579, + "loss": 0.6028, + "step": 62570 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002958631578947368, + "loss": 0.6066, + "step": 62580 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002957842105263158, + "loss": 0.5998, + "step": 62590 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029570526315789473, + "loss": 0.6178, + "step": 62600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002956263157894737, + "loss": 0.619, + "step": 62610 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002955473684210526, + "loss": 0.6166, + "step": 62620 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002954684210526316, + "loss": 0.6262, + "step": 62630 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002953894736842105, + "loss": 0.632, + "step": 62640 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002953105263157894, + "loss": 0.6159, + "step": 62650 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029523157894736836, + "loss": 0.6267, + "step": 62660 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002951526315789473, + "loss": 0.632, + "step": 62670 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029507368421052626, + "loss": 0.6263, + "step": 62680 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029499473684210526, + "loss": 0.6209, + "step": 62690 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002949157894736842, + "loss": 0.6242, + "step": 62700 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029483684210526316, + "loss": 0.6247, + "step": 62710 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002947578947368421, + "loss": 0.6175, + "step": 62720 + }, + { + "epoch": 0.63, + "learning_rate": 0.000294678947368421, + "loss": 0.6334, + "step": 62730 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029459999999999995, + "loss": 0.6237, + "step": 62740 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002945210526315789, + "loss": 0.6233, + "step": 62750 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029444210526315785, + "loss": 0.6153, + "step": 62760 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002943631578947368, + "loss": 0.6262, + "step": 62770 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002942842105263158, + "loss": 0.6317, + "step": 62780 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029420526315789475, + "loss": 0.6091, + "step": 62790 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029412631578947364, + "loss": 0.6173, + "step": 62800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002940473684210526, + "loss": 0.6332, + "step": 62810 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029396842105263154, + "loss": 0.6217, + "step": 62820 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002938894736842105, + "loss": 0.623, + "step": 62830 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029381052631578943, + "loss": 0.6223, + "step": 62840 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002937315789473684, + "loss": 0.6251, + "step": 62850 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029365263157894733, + "loss": 0.6207, + "step": 62860 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002935736842105263, + "loss": 0.6127, + "step": 62870 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029349473684210523, + "loss": 0.6234, + "step": 62880 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002934157894736842, + "loss": 0.6109, + "step": 62890 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002933368421052631, + "loss": 0.6243, + "step": 62900 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029325789473684207, + "loss": 0.6196, + "step": 62910 + }, + { + "epoch": 0.63, + "learning_rate": 0.000293178947368421, + "loss": 0.6146, + "step": 62920 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029309999999999997, + "loss": 0.6201, + "step": 62930 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002930210526315789, + "loss": 0.6172, + "step": 62940 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029294210526315787, + "loss": 0.6087, + "step": 62950 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002928631578947368, + "loss": 0.6242, + "step": 62960 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029278421052631576, + "loss": 0.6154, + "step": 62970 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029271315789473684, + "loss": 0.6189, + "step": 62980 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002926342105263158, + "loss": 0.6134, + "step": 62990 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002925552631578947, + "loss": 0.6109, + "step": 63000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029247631578947363, + "loss": 0.6258, + "step": 63010 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002923973684210526, + "loss": 0.6197, + "step": 63020 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002923184210526316, + "loss": 0.615, + "step": 63030 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002922394736842105, + "loss": 0.6197, + "step": 63040 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002921605263157895, + "loss": 0.6159, + "step": 63050 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002920815789473684, + "loss": 0.6103, + "step": 63060 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002920026315789473, + "loss": 0.607, + "step": 63070 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029192368421052627, + "loss": 0.6194, + "step": 63080 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002918447368421052, + "loss": 0.6207, + "step": 63090 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029176578947368416, + "loss": 0.6235, + "step": 63100 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002916868421052631, + "loss": 0.6125, + "step": 63110 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002916078947368421, + "loss": 0.6209, + "step": 63120 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029152894736842106, + "loss": 0.6281, + "step": 63130 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029145, + "loss": 0.608, + "step": 63140 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002913710526315789, + "loss": 0.6118, + "step": 63150 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029129210526315785, + "loss": 0.6085, + "step": 63160 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002912131578947368, + "loss": 0.6079, + "step": 63170 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029113421052631575, + "loss": 0.6188, + "step": 63180 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002910552631578947, + "loss": 0.6259, + "step": 63190 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029097631578947365, + "loss": 0.6247, + "step": 63200 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002908973684210526, + "loss": 0.6283, + "step": 63210 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029081842105263154, + "loss": 0.6154, + "step": 63220 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002907394736842105, + "loss": 0.6136, + "step": 63230 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029066052631578944, + "loss": 0.6131, + "step": 63240 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002905815789473684, + "loss": 0.6044, + "step": 63250 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029050263157894734, + "loss": 0.6216, + "step": 63260 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002904236842105263, + "loss": 0.6183, + "step": 63270 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029034473684210523, + "loss": 0.6204, + "step": 63280 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002902657894736842, + "loss": 0.6173, + "step": 63290 + }, + { + "epoch": 0.63, + "learning_rate": 0.00029018684210526313, + "loss": 0.613, + "step": 63300 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002901078947368421, + "loss": 0.6297, + "step": 63310 + }, + { + "epoch": 0.63, + "learning_rate": 0.000290028947368421, + "loss": 0.6177, + "step": 63320 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028995, + "loss": 0.6246, + "step": 63330 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002898710526315789, + "loss": 0.6265, + "step": 63340 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028979210526315787, + "loss": 0.635, + "step": 63350 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002897131578947368, + "loss": 0.6188, + "step": 63360 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028963421052631577, + "loss": 0.618, + "step": 63370 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002895552631578947, + "loss": 0.6167, + "step": 63380 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028947631578947366, + "loss": 0.6207, + "step": 63390 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002893973684210526, + "loss": 0.6235, + "step": 63400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028931842105263156, + "loss": 0.6006, + "step": 63410 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002892394736842105, + "loss": 0.6155, + "step": 63420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028916052631578946, + "loss": 0.6181, + "step": 63430 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002890815789473684, + "loss": 0.6073, + "step": 63440 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028900263157894735, + "loss": 0.6012, + "step": 63450 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002889236842105263, + "loss": 0.6, + "step": 63460 + }, + { + "epoch": 0.63, + "learning_rate": 0.00028884473684210525, + "loss": 0.6143, + "step": 63470 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002887657894736842, + "loss": 0.6202, + "step": 63480 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002886868421052631, + "loss": 0.6072, + "step": 63490 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002886078947368421, + "loss": 0.6031, + "step": 63500 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028852894736842104, + "loss": 0.623, + "step": 63510 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028845, + "loss": 0.6119, + "step": 63520 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028837105263157894, + "loss": 0.6141, + "step": 63530 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002882921052631579, + "loss": 0.607, + "step": 63540 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028821315789473684, + "loss": 0.6221, + "step": 63550 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028813421052631573, + "loss": 0.623, + "step": 63560 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002880552631578947, + "loss": 0.6202, + "step": 63570 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002879763157894736, + "loss": 0.6273, + "step": 63580 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028789736842105263, + "loss": 0.6149, + "step": 63590 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002878184210526316, + "loss": 0.6185, + "step": 63600 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002877394736842105, + "loss": 0.5978, + "step": 63610 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002876605263157895, + "loss": 0.614, + "step": 63620 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002875815789473684, + "loss": 0.6211, + "step": 63630 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002875026315789473, + "loss": 0.6186, + "step": 63640 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028742368421052626, + "loss": 0.6133, + "step": 63650 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002873447368421052, + "loss": 0.6148, + "step": 63660 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028726578947368416, + "loss": 0.6075, + "step": 63670 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028718684210526316, + "loss": 0.6113, + "step": 63680 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002871078947368421, + "loss": 0.6121, + "step": 63690 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028702894736842106, + "loss": 0.6223, + "step": 63700 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028694999999999995, + "loss": 0.6098, + "step": 63710 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002868710526315789, + "loss": 0.6202, + "step": 63720 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028679210526315785, + "loss": 0.6097, + "step": 63730 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002867131578947368, + "loss": 0.6047, + "step": 63740 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028663421052631575, + "loss": 0.609, + "step": 63750 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002865552631578947, + "loss": 0.6166, + "step": 63760 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028647631578947364, + "loss": 0.6015, + "step": 63770 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002863973684210526, + "loss": 0.6228, + "step": 63780 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028631842105263154, + "loss": 0.6104, + "step": 63790 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002862394736842105, + "loss": 0.6093, + "step": 63800 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028616052631578944, + "loss": 0.6078, + "step": 63810 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002860815789473684, + "loss": 0.6127, + "step": 63820 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028600263157894733, + "loss": 0.6215, + "step": 63830 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002859236842105263, + "loss": 0.6187, + "step": 63840 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028584473684210523, + "loss": 0.6207, + "step": 63850 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002857657894736842, + "loss": 0.6077, + "step": 63860 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028568684210526313, + "loss": 0.6114, + "step": 63870 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002856078947368421, + "loss": 0.6312, + "step": 63880 + }, + { + "epoch": 0.64, + "learning_rate": 0.000285528947368421, + "loss": 0.6231, + "step": 63890 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028544999999999997, + "loss": 0.6297, + "step": 63900 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002853710526315789, + "loss": 0.6151, + "step": 63910 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028529210526315787, + "loss": 0.612, + "step": 63920 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002852131578947368, + "loss": 0.6184, + "step": 63930 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028513421052631577, + "loss": 0.6172, + "step": 63940 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002850552631578947, + "loss": 0.6244, + "step": 63950 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028497631578947366, + "loss": 0.6144, + "step": 63960 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002848973684210526, + "loss": 0.6161, + "step": 63970 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028481842105263156, + "loss": 0.6147, + "step": 63980 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002847394736842105, + "loss": 0.6128, + "step": 63990 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028466052631578946, + "loss": 0.6253, + "step": 64000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002845815789473684, + "loss": 0.6227, + "step": 64010 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028450263157894735, + "loss": 0.6294, + "step": 64020 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002844236842105263, + "loss": 0.6138, + "step": 64030 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028434473684210525, + "loss": 0.6185, + "step": 64040 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028426578947368414, + "loss": 0.6142, + "step": 64050 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028418684210526315, + "loss": 0.6274, + "step": 64060 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002841078947368421, + "loss": 0.6248, + "step": 64070 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028402894736842104, + "loss": 0.617, + "step": 64080 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028395, + "loss": 0.6273, + "step": 64090 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028387105263157894, + "loss": 0.6357, + "step": 64100 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002837921052631579, + "loss": 0.6099, + "step": 64110 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028371315789473684, + "loss": 0.6208, + "step": 64120 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028363421052631573, + "loss": 0.6179, + "step": 64130 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002835552631578947, + "loss": 0.6301, + "step": 64140 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002834763157894737, + "loss": 0.6158, + "step": 64150 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028339736842105263, + "loss": 0.627, + "step": 64160 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002833184210526316, + "loss": 0.6273, + "step": 64170 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002832394736842105, + "loss": 0.6132, + "step": 64180 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002831605263157895, + "loss": 0.6196, + "step": 64190 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028308157894736837, + "loss": 0.6202, + "step": 64200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002830026315789473, + "loss": 0.6246, + "step": 64210 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028292368421052626, + "loss": 0.623, + "step": 64220 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002828447368421052, + "loss": 0.6155, + "step": 64230 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002827657894736842, + "loss": 0.6124, + "step": 64240 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028268684210526316, + "loss": 0.6046, + "step": 64250 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002826078947368421, + "loss": 0.6129, + "step": 64260 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028252894736842106, + "loss": 0.6272, + "step": 64270 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028244999999999995, + "loss": 0.6276, + "step": 64280 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002823710526315789, + "loss": 0.6191, + "step": 64290 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028229210526315785, + "loss": 0.5869, + "step": 64300 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002822131578947368, + "loss": 0.6192, + "step": 64310 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028213421052631575, + "loss": 0.6201, + "step": 64320 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028205526315789475, + "loss": 0.603, + "step": 64330 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002819763157894737, + "loss": 0.586, + "step": 64340 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002818973684210526, + "loss": 0.6202, + "step": 64350 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028181842105263154, + "loss": 0.6076, + "step": 64360 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002817394736842105, + "loss": 0.6187, + "step": 64370 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028166052631578944, + "loss": 0.6214, + "step": 64380 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002815815789473684, + "loss": 0.6157, + "step": 64390 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028150263157894733, + "loss": 0.6171, + "step": 64400 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002814236842105263, + "loss": 0.6168, + "step": 64410 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028134473684210523, + "loss": 0.6155, + "step": 64420 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002812657894736842, + "loss": 0.6266, + "step": 64430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002811868421052631, + "loss": 0.6188, + "step": 64440 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002811078947368421, + "loss": 0.6228, + "step": 64450 + }, + { + "epoch": 0.64, + "learning_rate": 0.000281028947368421, + "loss": 0.6205, + "step": 64460 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028094999999999997, + "loss": 0.6278, + "step": 64470 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002808710526315789, + "loss": 0.6114, + "step": 64480 + }, + { + "epoch": 0.64, + "learning_rate": 0.00028079210526315787, + "loss": 0.6115, + "step": 64490 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002807131578947368, + "loss": 0.6262, + "step": 64500 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028063421052631576, + "loss": 0.6117, + "step": 64510 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002805552631578947, + "loss": 0.6196, + "step": 64520 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028047631578947366, + "loss": 0.6172, + "step": 64530 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002803973684210526, + "loss": 0.6063, + "step": 64540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028031842105263156, + "loss": 0.6109, + "step": 64550 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002802394736842105, + "loss": 0.6074, + "step": 64560 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028016052631578945, + "loss": 0.6105, + "step": 64570 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002800815789473684, + "loss": 0.6199, + "step": 64580 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028000263157894735, + "loss": 0.6157, + "step": 64590 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002799236842105263, + "loss": 0.6174, + "step": 64600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027984473684210525, + "loss": 0.6083, + "step": 64610 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002797657894736842, + "loss": 0.6119, + "step": 64620 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027968684210526314, + "loss": 0.6272, + "step": 64630 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002796078947368421, + "loss": 0.6078, + "step": 64640 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027952894736842104, + "loss": 0.6267, + "step": 64650 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027945, + "loss": 0.622, + "step": 64660 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027937105263157894, + "loss": 0.6205, + "step": 64670 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002792921052631579, + "loss": 0.6193, + "step": 64680 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002792131578947368, + "loss": 0.6246, + "step": 64690 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027913421052631573, + "loss": 0.627, + "step": 64700 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027905526315789473, + "loss": 0.627, + "step": 64710 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002789763157894737, + "loss": 0.6091, + "step": 64720 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027889736842105263, + "loss": 0.6127, + "step": 64730 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002788184210526316, + "loss": 0.6177, + "step": 64740 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002787394736842105, + "loss": 0.6183, + "step": 64750 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027866052631578947, + "loss": 0.6197, + "step": 64760 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027858157894736837, + "loss": 0.6205, + "step": 64770 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002785026315789473, + "loss": 0.6174, + "step": 64780 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027842368421052626, + "loss": 0.621, + "step": 64790 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027834473684210527, + "loss": 0.6157, + "step": 64800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002782657894736842, + "loss": 0.6121, + "step": 64810 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027818684210526316, + "loss": 0.6165, + "step": 64820 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002781078947368421, + "loss": 0.6229, + "step": 64830 + }, + { + "epoch": 0.65, + "learning_rate": 0.000278028947368421, + "loss": 0.6208, + "step": 64840 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027794999999999995, + "loss": 0.6162, + "step": 64850 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002778710526315789, + "loss": 0.6046, + "step": 64860 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027779210526315785, + "loss": 0.6047, + "step": 64870 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002777131578947368, + "loss": 0.6118, + "step": 64880 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002776342105263158, + "loss": 0.6132, + "step": 64890 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027755526315789475, + "loss": 0.6199, + "step": 64900 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027747631578947364, + "loss": 0.6303, + "step": 64910 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002773973684210526, + "loss": 0.6183, + "step": 64920 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027731842105263154, + "loss": 0.612, + "step": 64930 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002772394736842105, + "loss": 0.6118, + "step": 64940 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027716052631578944, + "loss": 0.6252, + "step": 64950 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002770815789473684, + "loss": 0.6116, + "step": 64960 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027700263157894733, + "loss": 0.6268, + "step": 64970 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002769236842105263, + "loss": 0.6126, + "step": 64980 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027685263157894736, + "loss": 0.6084, + "step": 64990 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002767736842105263, + "loss": 0.6029, + "step": 65000 + }, + { + "epoch": 0.65, + "eval_accuracy": 0.8712975328502192, + "eval_loss": 0.59521484375, + "eval_runtime": 97.298, + "eval_samples_per_second": 822.217, + "eval_steps_per_second": 1.614, + "step": 65000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027669473684210525, + "loss": 0.6275, + "step": 65010 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002766157894736842, + "loss": 0.6172, + "step": 65020 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027653684210526315, + "loss": 0.637, + "step": 65030 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027645789473684204, + "loss": 0.6185, + "step": 65040 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027637894736842105, + "loss": 0.6182, + "step": 65050 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002763, + "loss": 0.6237, + "step": 65060 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027622105263157894, + "loss": 0.6106, + "step": 65070 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002761421052631579, + "loss": 0.6254, + "step": 65080 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027606315789473684, + "loss": 0.6164, + "step": 65090 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002759842105263158, + "loss": 0.6195, + "step": 65100 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002759131578947368, + "loss": 0.6106, + "step": 65110 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027583421052631576, + "loss": 0.6329, + "step": 65120 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002757552631578947, + "loss": 0.6166, + "step": 65130 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027567631578947365, + "loss": 0.6229, + "step": 65140 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002755973684210526, + "loss": 0.6177, + "step": 65150 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027551842105263155, + "loss": 0.629, + "step": 65160 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002754394736842105, + "loss": 0.6317, + "step": 65170 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027536052631578945, + "loss": 0.6117, + "step": 65180 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002752815789473684, + "loss": 0.6122, + "step": 65190 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027520263157894734, + "loss": 0.6091, + "step": 65200 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002751236842105263, + "loss": 0.6138, + "step": 65210 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027504473684210524, + "loss": 0.6181, + "step": 65220 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002749657894736842, + "loss": 0.6213, + "step": 65230 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027488684210526314, + "loss": 0.6174, + "step": 65240 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002748078947368421, + "loss": 0.6162, + "step": 65250 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027472894736842103, + "loss": 0.6247, + "step": 65260 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027465, + "loss": 0.6168, + "step": 65270 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027457105263157893, + "loss": 0.6246, + "step": 65280 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002744921052631579, + "loss": 0.6223, + "step": 65290 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002744131578947368, + "loss": 0.6196, + "step": 65300 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002743342105263158, + "loss": 0.6196, + "step": 65310 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002742552631578947, + "loss": 0.6128, + "step": 65320 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027417631578947367, + "loss": 0.6262, + "step": 65330 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002740973684210526, + "loss": 0.6173, + "step": 65340 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027401842105263157, + "loss": 0.6169, + "step": 65350 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002739394736842105, + "loss": 0.6241, + "step": 65360 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027386052631578946, + "loss": 0.6195, + "step": 65370 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027378157894736836, + "loss": 0.6215, + "step": 65380 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002737026315789473, + "loss": 0.62, + "step": 65390 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002736236842105263, + "loss": 0.6165, + "step": 65400 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027354473684210526, + "loss": 0.6089, + "step": 65410 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002734657894736842, + "loss": 0.6126, + "step": 65420 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027338684210526315, + "loss": 0.6049, + "step": 65430 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002733078947368421, + "loss": 0.6083, + "step": 65440 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027322894736842105, + "loss": 0.6204, + "step": 65450 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027314999999999994, + "loss": 0.6178, + "step": 65460 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002730710526315789, + "loss": 0.6159, + "step": 65470 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027299210526315784, + "loss": 0.6279, + "step": 65480 + }, + { + "epoch": 0.65, + "learning_rate": 0.00027291315789473684, + "loss": 0.6057, + "step": 65490 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002728342105263158, + "loss": 0.6107, + "step": 65500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027275526315789474, + "loss": 0.6107, + "step": 65510 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002726763157894737, + "loss": 0.6217, + "step": 65520 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002725973684210526, + "loss": 0.6232, + "step": 65530 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027251842105263153, + "loss": 0.6323, + "step": 65540 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002724394736842105, + "loss": 0.6193, + "step": 65550 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027236052631578943, + "loss": 0.6173, + "step": 65560 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002722815789473684, + "loss": 0.6192, + "step": 65570 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002722026315789474, + "loss": 0.6125, + "step": 65580 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002721236842105263, + "loss": 0.6193, + "step": 65590 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002720447368421053, + "loss": 0.6226, + "step": 65600 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027196578947368417, + "loss": 0.6216, + "step": 65610 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002718868421052631, + "loss": 0.614, + "step": 65620 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027180789473684207, + "loss": 0.6163, + "step": 65630 + }, + { + "epoch": 0.66, + "learning_rate": 0.000271728947368421, + "loss": 0.622, + "step": 65640 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027164999999999996, + "loss": 0.6128, + "step": 65650 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002715710526315789, + "loss": 0.6167, + "step": 65660 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027149210526315786, + "loss": 0.6153, + "step": 65670 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002714131578947368, + "loss": 0.6106, + "step": 65680 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027133421052631576, + "loss": 0.6077, + "step": 65690 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002712552631578947, + "loss": 0.618, + "step": 65700 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027117631578947365, + "loss": 0.6081, + "step": 65710 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002710973684210526, + "loss": 0.627, + "step": 65720 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027101842105263155, + "loss": 0.6084, + "step": 65730 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002709394736842105, + "loss": 0.6128, + "step": 65740 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027086052631578945, + "loss": 0.6034, + "step": 65750 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002707815789473684, + "loss": 0.6198, + "step": 65760 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027070263157894734, + "loss": 0.6161, + "step": 65770 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002706236842105263, + "loss": 0.6204, + "step": 65780 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027054473684210524, + "loss": 0.6147, + "step": 65790 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002704657894736842, + "loss": 0.6048, + "step": 65800 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027038684210526314, + "loss": 0.6058, + "step": 65810 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002703078947368421, + "loss": 0.6189, + "step": 65820 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027022894736842103, + "loss": 0.6136, + "step": 65830 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027015, + "loss": 0.6153, + "step": 65840 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027007105263157893, + "loss": 0.6257, + "step": 65850 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002699921052631579, + "loss": 0.6279, + "step": 65860 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002699131578947368, + "loss": 0.6163, + "step": 65870 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002698342105263158, + "loss": 0.6127, + "step": 65880 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002697552631578947, + "loss": 0.6032, + "step": 65890 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026967631578947367, + "loss": 0.6238, + "step": 65900 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002695973684210526, + "loss": 0.6083, + "step": 65910 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026951842105263157, + "loss": 0.6041, + "step": 65920 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002694394736842105, + "loss": 0.6112, + "step": 65930 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026936052631578946, + "loss": 0.6085, + "step": 65940 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026928157894736836, + "loss": 0.6115, + "step": 65950 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026920263157894736, + "loss": 0.6172, + "step": 65960 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002691236842105263, + "loss": 0.6136, + "step": 65970 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026904473684210526, + "loss": 0.6114, + "step": 65980 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002689657894736842, + "loss": 0.606, + "step": 65990 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026888684210526315, + "loss": 0.6049, + "step": 66000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002688078947368421, + "loss": 0.6122, + "step": 66010 + }, + { + "epoch": 0.66, + "learning_rate": 0.000268728947368421, + "loss": 0.613, + "step": 66020 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026864999999999994, + "loss": 0.6181, + "step": 66030 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002685710526315789, + "loss": 0.6076, + "step": 66040 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002684921052631579, + "loss": 0.6094, + "step": 66050 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026841315789473684, + "loss": 0.6061, + "step": 66060 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002683342105263158, + "loss": 0.6089, + "step": 66070 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026825526315789474, + "loss": 0.6199, + "step": 66080 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002681763157894737, + "loss": 0.6147, + "step": 66090 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002680973684210526, + "loss": 0.6188, + "step": 66100 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026801842105263153, + "loss": 0.6109, + "step": 66110 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002679394736842105, + "loss": 0.6093, + "step": 66120 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002678605263157894, + "loss": 0.617, + "step": 66130 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026778157894736843, + "loss": 0.6152, + "step": 66140 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002677026315789474, + "loss": 0.6077, + "step": 66150 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002676236842105263, + "loss": 0.6152, + "step": 66160 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002675447368421052, + "loss": 0.6165, + "step": 66170 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026746578947368417, + "loss": 0.6205, + "step": 66180 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002673868421052631, + "loss": 0.6121, + "step": 66190 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026730789473684206, + "loss": 0.6138, + "step": 66200 + }, + { + "epoch": 0.66, + "learning_rate": 0.000267228947368421, + "loss": 0.6088, + "step": 66210 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026714999999999996, + "loss": 0.6114, + "step": 66220 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002670710526315789, + "loss": 0.6249, + "step": 66230 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026699210526315786, + "loss": 0.6134, + "step": 66240 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002669131578947368, + "loss": 0.6137, + "step": 66250 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026683421052631575, + "loss": 0.6173, + "step": 66260 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002667552631578947, + "loss": 0.6057, + "step": 66270 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026667631578947365, + "loss": 0.6091, + "step": 66280 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002665973684210526, + "loss": 0.6195, + "step": 66290 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026651842105263155, + "loss": 0.6172, + "step": 66300 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002664394736842105, + "loss": 0.6113, + "step": 66310 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026636052631578944, + "loss": 0.6148, + "step": 66320 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002662815789473684, + "loss": 0.605, + "step": 66330 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026620263157894734, + "loss": 0.6161, + "step": 66340 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002661236842105263, + "loss": 0.6172, + "step": 66350 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026604473684210524, + "loss": 0.6171, + "step": 66360 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002659657894736842, + "loss": 0.609, + "step": 66370 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026588684210526313, + "loss": 0.6061, + "step": 66380 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002658078947368421, + "loss": 0.5975, + "step": 66390 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026572894736842103, + "loss": 0.6149, + "step": 66400 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026565, + "loss": 0.623, + "step": 66410 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026557105263157893, + "loss": 0.607, + "step": 66420 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002654921052631579, + "loss": 0.6125, + "step": 66430 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002654131578947368, + "loss": 0.6125, + "step": 66440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026533421052631577, + "loss": 0.5879, + "step": 66450 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002652552631578947, + "loss": 0.6151, + "step": 66460 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026517631578947367, + "loss": 0.6237, + "step": 66470 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002650973684210526, + "loss": 0.6168, + "step": 66480 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026501842105263157, + "loss": 0.6005, + "step": 66490 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002649394736842105, + "loss": 0.6086, + "step": 66500 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002648605263157894, + "loss": 0.6099, + "step": 66510 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002647815789473684, + "loss": 0.6189, + "step": 66520 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026470263157894736, + "loss": 0.6127, + "step": 66530 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002646236842105263, + "loss": 0.6078, + "step": 66540 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026454473684210526, + "loss": 0.6057, + "step": 66550 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002644657894736842, + "loss": 0.6142, + "step": 66560 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026438684210526315, + "loss": 0.6087, + "step": 66570 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002643078947368421, + "loss": 0.6079, + "step": 66580 + }, + { + "epoch": 0.67, + "learning_rate": 0.000264228947368421, + "loss": 0.5868, + "step": 66590 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026414999999999994, + "loss": 0.5994, + "step": 66600 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026407105263157895, + "loss": 0.5836, + "step": 66610 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002639921052631579, + "loss": 0.6064, + "step": 66620 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002639210526315789, + "loss": 0.6007, + "step": 66630 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026384210526315786, + "loss": 0.5975, + "step": 66640 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002637631578947368, + "loss": 0.5916, + "step": 66650 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026368421052631576, + "loss": 0.6027, + "step": 66660 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002636052631578947, + "loss": 0.6034, + "step": 66670 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026352631578947366, + "loss": 0.6089, + "step": 66680 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002634473684210526, + "loss": 0.5924, + "step": 66690 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026336842105263155, + "loss": 0.5966, + "step": 66700 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002632894736842105, + "loss": 0.59, + "step": 66710 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026321052631578945, + "loss": 0.5982, + "step": 66720 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002631315789473684, + "loss": 0.5976, + "step": 66730 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026305263157894735, + "loss": 0.608, + "step": 66740 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002629736842105263, + "loss": 0.6017, + "step": 66750 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026289473684210524, + "loss": 0.5987, + "step": 66760 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002628157894736842, + "loss": 0.5861, + "step": 66770 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026273684210526314, + "loss": 0.5971, + "step": 66780 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002626578947368421, + "loss": 0.5958, + "step": 66790 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026257894736842104, + "loss": 0.596, + "step": 66800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002625, + "loss": 0.5842, + "step": 66810 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026242105263157893, + "loss": 0.5935, + "step": 66820 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002623421052631579, + "loss": 0.5913, + "step": 66830 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026226315789473683, + "loss": 0.6111, + "step": 66840 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002621842105263158, + "loss": 0.6131, + "step": 66850 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002621052631578947, + "loss": 0.6299, + "step": 66860 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002620263157894737, + "loss": 0.6226, + "step": 66870 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002619473684210526, + "loss": 0.5992, + "step": 66880 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026186842105263157, + "loss": 0.6188, + "step": 66890 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002617894736842105, + "loss": 0.6228, + "step": 66900 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026171052631578947, + "loss": 0.6228, + "step": 66910 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002616315789473684, + "loss": 0.622, + "step": 66920 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002615526315789473, + "loss": 0.6124, + "step": 66930 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026147368421052626, + "loss": 0.6236, + "step": 66940 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026139473684210526, + "loss": 0.6076, + "step": 66950 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002613157894736842, + "loss": 0.6119, + "step": 66960 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026123684210526316, + "loss": 0.6109, + "step": 66970 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002611578947368421, + "loss": 0.618, + "step": 66980 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026107894736842105, + "loss": 0.6172, + "step": 66990 + }, + { + "epoch": 0.67, + "learning_rate": 0.000261, + "loss": 0.6186, + "step": 67000 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002609210526315789, + "loss": 0.6134, + "step": 67010 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026084210526315784, + "loss": 0.6014, + "step": 67020 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002607631578947368, + "loss": 0.6121, + "step": 67030 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026068421052631574, + "loss": 0.6131, + "step": 67040 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026060526315789474, + "loss": 0.6212, + "step": 67050 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002605263157894737, + "loss": 0.6212, + "step": 67060 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026044736842105264, + "loss": 0.6098, + "step": 67070 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026036842105263153, + "loss": 0.6035, + "step": 67080 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002602894736842105, + "loss": 0.5951, + "step": 67090 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026021052631578943, + "loss": 0.5969, + "step": 67100 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002601315789473684, + "loss": 0.5876, + "step": 67110 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002600526315789473, + "loss": 0.6134, + "step": 67120 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002599736842105263, + "loss": 0.6154, + "step": 67130 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002598947368421053, + "loss": 0.6118, + "step": 67140 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002598157894736842, + "loss": 0.6091, + "step": 67150 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002597368421052631, + "loss": 0.6192, + "step": 67160 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025965789473684207, + "loss": 0.6088, + "step": 67170 + }, + { + "epoch": 0.67, + "learning_rate": 0.000259578947368421, + "loss": 0.6051, + "step": 67180 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025949999999999997, + "loss": 0.6081, + "step": 67190 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002594210526315789, + "loss": 0.6192, + "step": 67200 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025934210526315786, + "loss": 0.6138, + "step": 67210 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002592631578947368, + "loss": 0.6073, + "step": 67220 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025918421052631576, + "loss": 0.6092, + "step": 67230 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002591052631578947, + "loss": 0.6033, + "step": 67240 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025902631578947365, + "loss": 0.5994, + "step": 67250 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002589473684210526, + "loss": 0.6172, + "step": 67260 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025886842105263155, + "loss": 0.6103, + "step": 67270 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002587894736842105, + "loss": 0.6128, + "step": 67280 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025871052631578945, + "loss": 0.6112, + "step": 67290 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002586315789473684, + "loss": 0.6225, + "step": 67300 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025855263157894734, + "loss": 0.6106, + "step": 67310 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002584736842105263, + "loss": 0.6076, + "step": 67320 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025839473684210524, + "loss": 0.61, + "step": 67330 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002583157894736842, + "loss": 0.6145, + "step": 67340 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025823684210526314, + "loss": 0.5988, + "step": 67350 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002581578947368421, + "loss": 0.6034, + "step": 67360 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025807894736842103, + "loss": 0.6168, + "step": 67370 + }, + { + "epoch": 0.67, + "learning_rate": 0.000258, + "loss": 0.6039, + "step": 67380 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025792105263157893, + "loss": 0.6013, + "step": 67390 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002578421052631579, + "loss": 0.6092, + "step": 67400 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025776315789473683, + "loss": 0.6197, + "step": 67410 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002576842105263158, + "loss": 0.6197, + "step": 67420 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002576052631578947, + "loss": 0.6105, + "step": 67430 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025752631578947367, + "loss": 0.6155, + "step": 67440 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002574473684210526, + "loss": 0.6026, + "step": 67450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025736842105263157, + "loss": 0.6058, + "step": 67460 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002572894736842105, + "loss": 0.6007, + "step": 67470 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025721052631578947, + "loss": 0.6056, + "step": 67480 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002571315789473684, + "loss": 0.6256, + "step": 67490 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002570526315789473, + "loss": 0.615, + "step": 67500 + }, + { + "epoch": 0.68, + "eval_accuracy": 0.8707361782979045, + "eval_loss": 0.5947265625, + "eval_runtime": 97.3705, + "eval_samples_per_second": 821.604, + "eval_steps_per_second": 1.612, + "step": 67500 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002569736842105263, + "loss": 0.6038, + "step": 67510 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025689473684210526, + "loss": 0.6058, + "step": 67520 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002568157894736842, + "loss": 0.6052, + "step": 67530 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025673684210526316, + "loss": 0.6114, + "step": 67540 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002566578947368421, + "loss": 0.606, + "step": 67550 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025657894736842105, + "loss": 0.6142, + "step": 67560 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025649999999999995, + "loss": 0.6073, + "step": 67570 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002564210526315789, + "loss": 0.6219, + "step": 67580 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025634210526315784, + "loss": 0.6162, + "step": 67590 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002562631578947368, + "loss": 0.6199, + "step": 67600 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002561842105263158, + "loss": 0.6224, + "step": 67610 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025610526315789474, + "loss": 0.6154, + "step": 67620 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002560263157894737, + "loss": 0.6133, + "step": 67630 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025594736842105264, + "loss": 0.6166, + "step": 67640 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025586842105263153, + "loss": 0.6177, + "step": 67650 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002557894736842105, + "loss": 0.607, + "step": 67660 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025571052631578943, + "loss": 0.6188, + "step": 67670 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002556315789473684, + "loss": 0.6005, + "step": 67680 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002555526315789473, + "loss": 0.6192, + "step": 67690 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025547368421052633, + "loss": 0.614, + "step": 67700 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002553947368421053, + "loss": 0.6191, + "step": 67710 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025531578947368417, + "loss": 0.6169, + "step": 67720 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002552368421052631, + "loss": 0.6028, + "step": 67730 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025515789473684207, + "loss": 0.6043, + "step": 67740 + }, + { + "epoch": 0.68, + "learning_rate": 0.000255078947368421, + "loss": 0.6143, + "step": 67750 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025499999999999996, + "loss": 0.6207, + "step": 67760 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002549210526315789, + "loss": 0.6236, + "step": 67770 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025484210526315786, + "loss": 0.6054, + "step": 67780 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002547631578947368, + "loss": 0.5998, + "step": 67790 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025468421052631576, + "loss": 0.6129, + "step": 67800 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002546052631578947, + "loss": 0.6191, + "step": 67810 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025452631578947365, + "loss": 0.627, + "step": 67820 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002544473684210526, + "loss": 0.6209, + "step": 67830 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025436842105263155, + "loss": 0.6149, + "step": 67840 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002542894736842105, + "loss": 0.6252, + "step": 67850 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025421052631578945, + "loss": 0.6138, + "step": 67860 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002541315789473684, + "loss": 0.6198, + "step": 67870 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025405263157894734, + "loss": 0.6133, + "step": 67880 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002539736842105263, + "loss": 0.62, + "step": 67890 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025389473684210524, + "loss": 0.6147, + "step": 67900 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002538157894736842, + "loss": 0.6097, + "step": 67910 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025373684210526314, + "loss": 0.6146, + "step": 67920 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025366578947368416, + "loss": 0.6189, + "step": 67930 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002535868421052631, + "loss": 0.6189, + "step": 67940 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002535078947368421, + "loss": 0.6292, + "step": 67950 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025342894736842106, + "loss": 0.6081, + "step": 67960 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025335, + "loss": 0.6154, + "step": 67970 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025327105263157895, + "loss": 0.6009, + "step": 67980 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025319210526315785, + "loss": 0.6056, + "step": 67990 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002531131578947368, + "loss": 0.6165, + "step": 68000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025303421052631574, + "loss": 0.6294, + "step": 68010 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002529552631578947, + "loss": 0.6219, + "step": 68020 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025287631578947364, + "loss": 0.6237, + "step": 68030 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002527973684210526, + "loss": 0.6049, + "step": 68040 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002527184210526316, + "loss": 0.6195, + "step": 68050 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025263947368421054, + "loss": 0.6178, + "step": 68060 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025256052631578943, + "loss": 0.6155, + "step": 68070 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002524815789473684, + "loss": 0.6085, + "step": 68080 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025240263157894733, + "loss": 0.6197, + "step": 68090 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002523236842105263, + "loss": 0.61, + "step": 68100 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025224473684210523, + "loss": 0.6132, + "step": 68110 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002521657894736842, + "loss": 0.6193, + "step": 68120 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002520868421052631, + "loss": 0.604, + "step": 68130 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025200789473684207, + "loss": 0.6106, + "step": 68140 + }, + { + "epoch": 0.68, + "learning_rate": 0.000251928947368421, + "loss": 0.6148, + "step": 68150 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025184999999999997, + "loss": 0.5906, + "step": 68160 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002517710526315789, + "loss": 0.6127, + "step": 68170 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025169210526315787, + "loss": 0.6092, + "step": 68180 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002516131578947368, + "loss": 0.6262, + "step": 68190 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025153421052631576, + "loss": 0.618, + "step": 68200 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002514552631578947, + "loss": 0.6237, + "step": 68210 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025137631578947366, + "loss": 0.6087, + "step": 68220 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002512973684210526, + "loss": 0.6156, + "step": 68230 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025121842105263156, + "loss": 0.6084, + "step": 68240 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002511394736842105, + "loss": 0.6167, + "step": 68250 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025106052631578945, + "loss": 0.616, + "step": 68260 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002509815789473684, + "loss": 0.6084, + "step": 68270 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025090263157894735, + "loss": 0.6154, + "step": 68280 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002508236842105263, + "loss": 0.6127, + "step": 68290 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025074473684210525, + "loss": 0.6065, + "step": 68300 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002506657894736842, + "loss": 0.6116, + "step": 68310 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025058684210526314, + "loss": 0.6053, + "step": 68320 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002505078947368421, + "loss": 0.6185, + "step": 68330 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025042894736842104, + "loss": 0.6165, + "step": 68340 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025035, + "loss": 0.6125, + "step": 68350 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025027105263157893, + "loss": 0.6134, + "step": 68360 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002501921052631579, + "loss": 0.6174, + "step": 68370 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025011315789473683, + "loss": 0.6202, + "step": 68380 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002500342105263158, + "loss": 0.6204, + "step": 68390 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024995526315789473, + "loss": 0.6053, + "step": 68400 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002498763157894736, + "loss": 0.6128, + "step": 68410 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002497973684210526, + "loss": 0.6161, + "step": 68420 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002497184210526316, + "loss": 0.6068, + "step": 68430 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002496394736842105, + "loss": 0.6023, + "step": 68440 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024956052631578947, + "loss": 0.6008, + "step": 68450 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002494815789473684, + "loss": 0.6074, + "step": 68460 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024940263157894737, + "loss": 0.5798, + "step": 68470 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024932368421052626, + "loss": 0.603, + "step": 68480 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002492447368421052, + "loss": 0.6085, + "step": 68490 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024916578947368416, + "loss": 0.6058, + "step": 68500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024908684210526316, + "loss": 0.6086, + "step": 68510 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002490078947368421, + "loss": 0.6189, + "step": 68520 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024892894736842106, + "loss": 0.609, + "step": 68530 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024885, + "loss": 0.6023, + "step": 68540 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024877105263157895, + "loss": 0.6105, + "step": 68550 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024869210526315785, + "loss": 0.6157, + "step": 68560 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002486131578947368, + "loss": 0.6098, + "step": 68570 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024853421052631574, + "loss": 0.6157, + "step": 68580 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002484552631578947, + "loss": 0.6039, + "step": 68590 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002483763157894737, + "loss": 0.6003, + "step": 68600 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024829736842105264, + "loss": 0.597, + "step": 68610 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002482184210526316, + "loss": 0.6065, + "step": 68620 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002481394736842105, + "loss": 0.6053, + "step": 68630 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024806052631578943, + "loss": 0.612, + "step": 68640 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002479815789473684, + "loss": 0.6069, + "step": 68650 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024790263157894733, + "loss": 0.6088, + "step": 68660 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002478236842105263, + "loss": 0.6081, + "step": 68670 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002477447368421052, + "loss": 0.6058, + "step": 68680 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002476657894736842, + "loss": 0.6156, + "step": 68690 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002475868421052632, + "loss": 0.6117, + "step": 68700 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024750789473684207, + "loss": 0.6123, + "step": 68710 + }, + { + "epoch": 0.69, + "learning_rate": 0.000247428947368421, + "loss": 0.611, + "step": 68720 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024734999999999997, + "loss": 0.6127, + "step": 68730 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002472710526315789, + "loss": 0.6079, + "step": 68740 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024719210526315786, + "loss": 0.6086, + "step": 68750 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002471131578947368, + "loss": 0.6038, + "step": 68760 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024703421052631576, + "loss": 0.6027, + "step": 68770 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002469552631578947, + "loss": 0.604, + "step": 68780 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024687631578947366, + "loss": 0.6101, + "step": 68790 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002467973684210526, + "loss": 0.5935, + "step": 68800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024671842105263155, + "loss": 0.613, + "step": 68810 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002466394736842105, + "loss": 0.6102, + "step": 68820 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024656052631578945, + "loss": 0.6134, + "step": 68830 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002464815789473684, + "loss": 0.6165, + "step": 68840 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024640263157894735, + "loss": 0.6066, + "step": 68850 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002463236842105263, + "loss": 0.5881, + "step": 68860 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024624473684210524, + "loss": 0.6132, + "step": 68870 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002461657894736842, + "loss": 0.6242, + "step": 68880 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024608684210526314, + "loss": 0.6087, + "step": 68890 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002460078947368421, + "loss": 0.5976, + "step": 68900 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024592894736842104, + "loss": 0.5987, + "step": 68910 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024585, + "loss": 0.6161, + "step": 68920 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024577105263157893, + "loss": 0.6109, + "step": 68930 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002456921052631579, + "loss": 0.6189, + "step": 68940 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024561315789473683, + "loss": 0.6157, + "step": 68950 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002455342105263158, + "loss": 0.6059, + "step": 68960 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002454552631578947, + "loss": 0.6033, + "step": 68970 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002453763157894737, + "loss": 0.6102, + "step": 68980 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002452973684210526, + "loss": 0.6003, + "step": 68990 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024521842105263157, + "loss": 0.6109, + "step": 69000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002451394736842105, + "loss": 0.604, + "step": 69010 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024506052631578947, + "loss": 0.6056, + "step": 69020 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002449815789473684, + "loss": 0.6088, + "step": 69030 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024490263157894737, + "loss": 0.5902, + "step": 69040 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024482368421052626, + "loss": 0.6112, + "step": 69050 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002447447368421052, + "loss": 0.611, + "step": 69060 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002446657894736842, + "loss": 0.6094, + "step": 69070 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024458684210526316, + "loss": 0.6069, + "step": 69080 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002445078947368421, + "loss": 0.6074, + "step": 69090 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024442894736842106, + "loss": 0.6131, + "step": 69100 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024435, + "loss": 0.5869, + "step": 69110 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002442710526315789, + "loss": 0.6104, + "step": 69120 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024419210526315785, + "loss": 0.6049, + "step": 69130 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024411315789473682, + "loss": 0.6167, + "step": 69140 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024403421052631574, + "loss": 0.6033, + "step": 69150 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024395526315789474, + "loss": 0.6039, + "step": 69160 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024387631578947367, + "loss": 0.5829, + "step": 69170 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024379736842105261, + "loss": 0.5959, + "step": 69180 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024371842105263156, + "loss": 0.6027, + "step": 69190 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002436394736842105, + "loss": 0.6057, + "step": 69200 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024356052631578946, + "loss": 0.5997, + "step": 69210 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002434815789473684, + "loss": 0.6002, + "step": 69220 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024340263157894733, + "loss": 0.5976, + "step": 69230 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024332368421052628, + "loss": 0.607, + "step": 69240 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024324473684210523, + "loss": 0.6073, + "step": 69250 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002431657894736842, + "loss": 0.5946, + "step": 69260 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024308684210526315, + "loss": 0.6051, + "step": 69270 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002430078947368421, + "loss": 0.598, + "step": 69280 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024292894736842105, + "loss": 0.602, + "step": 69290 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024284999999999997, + "loss": 0.5844, + "step": 69300 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024277105263157892, + "loss": 0.6098, + "step": 69310 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024269210526315786, + "loss": 0.6077, + "step": 69320 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002426131578947368, + "loss": 0.6136, + "step": 69330 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024253421052631576, + "loss": 0.6044, + "step": 69340 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024245526315789474, + "loss": 0.6037, + "step": 69350 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024237631578947368, + "loss": 0.6013, + "step": 69360 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002422973684210526, + "loss": 0.5935, + "step": 69370 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024221842105263155, + "loss": 0.6034, + "step": 69380 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002421394736842105, + "loss": 0.6045, + "step": 69390 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024206052631578945, + "loss": 0.5987, + "step": 69400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002419815789473684, + "loss": 0.6052, + "step": 69410 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024190263157894735, + "loss": 0.6117, + "step": 69420 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024182368421052627, + "loss": 0.5942, + "step": 69430 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024174473684210527, + "loss": 0.5926, + "step": 69440 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002416657894736842, + "loss": 0.605, + "step": 69450 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024158684210526314, + "loss": 0.5998, + "step": 69460 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002415078947368421, + "loss": 0.6041, + "step": 69470 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024142894736842104, + "loss": 0.5895, + "step": 69480 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024134999999999998, + "loss": 0.5974, + "step": 69490 + }, + { + "epoch": 0.69, + "learning_rate": 0.00024127105263157893, + "loss": 0.5932, + "step": 69500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024119210526315785, + "loss": 0.6043, + "step": 69510 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002411131578947368, + "loss": 0.605, + "step": 69520 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024103421052631575, + "loss": 0.6021, + "step": 69530 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024095526315789473, + "loss": 0.5918, + "step": 69540 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024087631578947367, + "loss": 0.5874, + "step": 69550 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024079736842105262, + "loss": 0.5916, + "step": 69560 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024071842105263157, + "loss": 0.6072, + "step": 69570 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002406394736842105, + "loss": 0.5992, + "step": 69580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024056052631578944, + "loss": 0.5925, + "step": 69590 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002404815789473684, + "loss": 0.5935, + "step": 69600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024040263157894734, + "loss": 0.5945, + "step": 69610 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024032368421052629, + "loss": 0.6028, + "step": 69620 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024024473684210526, + "loss": 0.5978, + "step": 69630 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002401657894736842, + "loss": 0.6066, + "step": 69640 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024008684210526316, + "loss": 0.5957, + "step": 69650 + }, + { + "epoch": 0.7, + "learning_rate": 0.00024000789473684208, + "loss": 0.5925, + "step": 69660 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023992894736842103, + "loss": 0.592, + "step": 69670 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023984999999999998, + "loss": 0.5931, + "step": 69680 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023977105263157892, + "loss": 0.607, + "step": 69690 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023969210526315787, + "loss": 0.5953, + "step": 69700 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023961315789473682, + "loss": 0.5925, + "step": 69710 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002395342105263158, + "loss": 0.6155, + "step": 69720 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023945526315789472, + "loss": 0.6088, + "step": 69730 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023937631578947367, + "loss": 0.5937, + "step": 69740 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002392973684210526, + "loss": 0.6061, + "step": 69750 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023921842105263156, + "loss": 0.6011, + "step": 69760 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002391394736842105, + "loss": 0.6094, + "step": 69770 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023906052631578946, + "loss": 0.6065, + "step": 69780 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023898157894736838, + "loss": 0.5998, + "step": 69790 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023890263157894733, + "loss": 0.6013, + "step": 69800 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023882368421052628, + "loss": 0.6027, + "step": 69810 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023874473684210525, + "loss": 0.6078, + "step": 69820 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002386657894736842, + "loss": 0.6176, + "step": 69830 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023858684210526315, + "loss": 0.6084, + "step": 69840 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002385078947368421, + "loss": 0.605, + "step": 69850 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023842894736842102, + "loss": 0.6058, + "step": 69860 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023834999999999997, + "loss": 0.5942, + "step": 69870 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023827105263157891, + "loss": 0.5972, + "step": 69880 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023819210526315786, + "loss": 0.5919, + "step": 69890 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002381131578947368, + "loss": 0.5953, + "step": 69900 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023803421052631579, + "loss": 0.5834, + "step": 69910 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023795526315789473, + "loss": 0.5894, + "step": 69920 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023787631578947368, + "loss": 0.5811, + "step": 69930 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002378052631578947, + "loss": 0.6117, + "step": 69940 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023772631578947365, + "loss": 0.6008, + "step": 69950 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002376473684210526, + "loss": 0.6081, + "step": 69960 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023756842105263158, + "loss": 0.6041, + "step": 69970 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023748947368421052, + "loss": 0.6165, + "step": 69980 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023741052631578947, + "loss": 0.5945, + "step": 69990 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002373315789473684, + "loss": 0.6078, + "step": 70000 + }, + { + "epoch": 0.7, + "eval_accuracy": 0.8734435544718635, + "eval_loss": 0.583984375, + "eval_runtime": 98.2973, + "eval_samples_per_second": 813.857, + "eval_steps_per_second": 1.597, + "step": 70000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023725263157894734, + "loss": 0.5939, + "step": 70010 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002371736842105263, + "loss": 0.6039, + "step": 70020 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023709473684210524, + "loss": 0.5968, + "step": 70030 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002370157894736842, + "loss": 0.5757, + "step": 70040 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023693684210526314, + "loss": 0.5833, + "step": 70050 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023685789473684206, + "loss": 0.5848, + "step": 70060 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023677894736842106, + "loss": 0.6055, + "step": 70070 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023669999999999998, + "loss": 0.6161, + "step": 70080 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023662105263157893, + "loss": 0.5991, + "step": 70090 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023654210526315788, + "loss": 0.6033, + "step": 70100 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023646315789473682, + "loss": 0.6065, + "step": 70110 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023638421052631577, + "loss": 0.6057, + "step": 70120 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023630526315789472, + "loss": 0.6078, + "step": 70130 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023622631578947364, + "loss": 0.6102, + "step": 70140 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002361473684210526, + "loss": 0.6025, + "step": 70150 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023606842105263157, + "loss": 0.5977, + "step": 70160 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023598947368421051, + "loss": 0.6034, + "step": 70170 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023591052631578946, + "loss": 0.6022, + "step": 70180 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002358315789473684, + "loss": 0.6029, + "step": 70190 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023575263157894736, + "loss": 0.5929, + "step": 70200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023567368421052628, + "loss": 0.6114, + "step": 70210 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023559473684210523, + "loss": 0.6118, + "step": 70220 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023551578947368418, + "loss": 0.6014, + "step": 70230 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023543684210526313, + "loss": 0.595, + "step": 70240 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002353578947368421, + "loss": 0.5955, + "step": 70250 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023527894736842105, + "loss": 0.6039, + "step": 70260 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002352, + "loss": 0.6257, + "step": 70270 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023512105263157892, + "loss": 0.6124, + "step": 70280 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023504210526315787, + "loss": 0.6083, + "step": 70290 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023496315789473682, + "loss": 0.6003, + "step": 70300 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023488421052631576, + "loss": 0.6006, + "step": 70310 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002348052631578947, + "loss": 0.5982, + "step": 70320 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023472631578947366, + "loss": 0.6025, + "step": 70330 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023464736842105258, + "loss": 0.6044, + "step": 70340 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023456842105263158, + "loss": 0.5941, + "step": 70350 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002344894736842105, + "loss": 0.597, + "step": 70360 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023441052631578945, + "loss": 0.5882, + "step": 70370 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002343315789473684, + "loss": 0.5973, + "step": 70380 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023425263157894735, + "loss": 0.5996, + "step": 70390 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002341736842105263, + "loss": 0.6067, + "step": 70400 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023409473684210525, + "loss": 0.6058, + "step": 70410 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023401578947368417, + "loss": 0.6062, + "step": 70420 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023393684210526312, + "loss": 0.6052, + "step": 70430 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002338578947368421, + "loss": 0.5923, + "step": 70440 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023377894736842104, + "loss": 0.6033, + "step": 70450 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002337, + "loss": 0.6068, + "step": 70460 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023362105263157894, + "loss": 0.5843, + "step": 70470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023354210526315789, + "loss": 0.5965, + "step": 70480 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002334631578947368, + "loss": 0.5903, + "step": 70490 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023338421052631575, + "loss": 0.6136, + "step": 70500 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002333052631578947, + "loss": 0.6037, + "step": 70510 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023322631578947365, + "loss": 0.613, + "step": 70520 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023314736842105263, + "loss": 0.6103, + "step": 70530 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023306842105263157, + "loss": 0.5961, + "step": 70540 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023298947368421052, + "loss": 0.6017, + "step": 70550 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023291052631578947, + "loss": 0.6015, + "step": 70560 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002328315789473684, + "loss": 0.6025, + "step": 70570 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023275263157894734, + "loss": 0.5949, + "step": 70580 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002326736842105263, + "loss": 0.6025, + "step": 70590 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023259473684210524, + "loss": 0.6016, + "step": 70600 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023251578947368419, + "loss": 0.5956, + "step": 70610 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023243684210526313, + "loss": 0.597, + "step": 70620 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002323578947368421, + "loss": 0.6034, + "step": 70630 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023227894736842103, + "loss": 0.5926, + "step": 70640 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023219999999999998, + "loss": 0.5916, + "step": 70650 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023212105263157893, + "loss": 0.5967, + "step": 70660 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023204210526315788, + "loss": 0.6008, + "step": 70670 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023196315789473682, + "loss": 0.5852, + "step": 70680 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023188421052631577, + "loss": 0.5923, + "step": 70690 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002318052631578947, + "loss": 0.5933, + "step": 70700 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023172631578947364, + "loss": 0.6051, + "step": 70710 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023164736842105262, + "loss": 0.5979, + "step": 70720 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023156842105263157, + "loss": 0.5987, + "step": 70730 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023148947368421051, + "loss": 0.5959, + "step": 70740 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023141052631578946, + "loss": 0.5904, + "step": 70750 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002313315789473684, + "loss": 0.6042, + "step": 70760 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023125263157894733, + "loss": 0.6005, + "step": 70770 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023117368421052628, + "loss": 0.6, + "step": 70780 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023109473684210523, + "loss": 0.5935, + "step": 70790 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023101578947368418, + "loss": 0.608, + "step": 70800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023093684210526315, + "loss": 0.6092, + "step": 70810 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002308578947368421, + "loss": 0.5978, + "step": 70820 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023077894736842105, + "loss": 0.6018, + "step": 70830 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002307, + "loss": 0.6036, + "step": 70840 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023062105263157892, + "loss": 0.6035, + "step": 70850 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023054210526315787, + "loss": 0.5986, + "step": 70860 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023046315789473681, + "loss": 0.6031, + "step": 70870 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023038421052631576, + "loss": 0.5977, + "step": 70880 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002303052631578947, + "loss": 0.6025, + "step": 70890 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023022631578947366, + "loss": 0.594, + "step": 70900 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023014736842105263, + "loss": 0.614, + "step": 70910 + }, + { + "epoch": 0.71, + "learning_rate": 0.00023006842105263156, + "loss": 0.5989, + "step": 70920 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002299894736842105, + "loss": 0.595, + "step": 70930 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022991842105263155, + "loss": 0.5981, + "step": 70940 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002298394736842105, + "loss": 0.5895, + "step": 70950 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022976052631578945, + "loss": 0.6078, + "step": 70960 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022968157894736842, + "loss": 0.6083, + "step": 70970 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022960263157894737, + "loss": 0.6098, + "step": 70980 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002295236842105263, + "loss": 0.5887, + "step": 70990 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022944473684210524, + "loss": 0.5875, + "step": 71000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002293657894736842, + "loss": 0.6078, + "step": 71010 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022928684210526314, + "loss": 0.5973, + "step": 71020 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002292078947368421, + "loss": 0.6166, + "step": 71030 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022912894736842104, + "loss": 0.6056, + "step": 71040 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022904999999999996, + "loss": 0.5935, + "step": 71050 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002289710526315789, + "loss": 0.5915, + "step": 71060 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022889210526315788, + "loss": 0.6014, + "step": 71070 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022881315789473683, + "loss": 0.601, + "step": 71080 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022873421052631578, + "loss": 0.6017, + "step": 71090 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022865526315789473, + "loss": 0.6092, + "step": 71100 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022857631578947367, + "loss": 0.6009, + "step": 71110 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002284973684210526, + "loss": 0.5998, + "step": 71120 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022841842105263154, + "loss": 0.5892, + "step": 71130 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002283394736842105, + "loss": 0.5822, + "step": 71140 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022826052631578944, + "loss": 0.5975, + "step": 71150 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022818157894736842, + "loss": 0.617, + "step": 71160 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022810263157894736, + "loss": 0.5986, + "step": 71170 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002280236842105263, + "loss": 0.5952, + "step": 71180 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022794473684210526, + "loss": 0.6105, + "step": 71190 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022786578947368418, + "loss": 0.6043, + "step": 71200 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022778684210526313, + "loss": 0.6116, + "step": 71210 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022770789473684208, + "loss": 0.6148, + "step": 71220 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022762894736842103, + "loss": 0.611, + "step": 71230 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022754999999999997, + "loss": 0.6016, + "step": 71240 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022747105263157895, + "loss": 0.6035, + "step": 71250 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002273921052631579, + "loss": 0.5876, + "step": 71260 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022731315789473682, + "loss": 0.5937, + "step": 71270 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022723421052631577, + "loss": 0.5963, + "step": 71280 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022715526315789472, + "loss": 0.5965, + "step": 71290 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022707631578947366, + "loss": 0.5937, + "step": 71300 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002269973684210526, + "loss": 0.5938, + "step": 71310 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022691842105263156, + "loss": 0.5797, + "step": 71320 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022683947368421048, + "loss": 0.5936, + "step": 71330 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022676052631578946, + "loss": 0.5935, + "step": 71340 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002266815789473684, + "loss": 0.5929, + "step": 71350 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022660263157894735, + "loss": 0.5987, + "step": 71360 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002265236842105263, + "loss": 0.5836, + "step": 71370 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022644473684210525, + "loss": 0.576, + "step": 71380 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002263657894736842, + "loss": 0.5896, + "step": 71390 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022628684210526312, + "loss": 0.5827, + "step": 71400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022620789473684207, + "loss": 0.593, + "step": 71410 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022612894736842102, + "loss": 0.5809, + "step": 71420 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022604999999999997, + "loss": 0.5905, + "step": 71430 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022597105263157894, + "loss": 0.5885, + "step": 71440 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002258921052631579, + "loss": 0.5924, + "step": 71450 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022581315789473684, + "loss": 0.5871, + "step": 71460 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022573421052631579, + "loss": 0.5978, + "step": 71470 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002256552631578947, + "loss": 0.5868, + "step": 71480 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022557631578947365, + "loss": 0.596, + "step": 71490 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002254973684210526, + "loss": 0.5952, + "step": 71500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022541842105263155, + "loss": 0.5963, + "step": 71510 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002253394736842105, + "loss": 0.6128, + "step": 71520 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022526052631578948, + "loss": 0.6038, + "step": 71530 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022518157894736842, + "loss": 0.6014, + "step": 71540 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022510263157894734, + "loss": 0.6037, + "step": 71550 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002250236842105263, + "loss": 0.6062, + "step": 71560 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022494473684210524, + "loss": 0.5995, + "step": 71570 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002248657894736842, + "loss": 0.6024, + "step": 71580 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022478684210526314, + "loss": 0.599, + "step": 71590 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022470789473684209, + "loss": 0.6094, + "step": 71600 + }, + { + "epoch": 0.72, + "learning_rate": 0.000224628947368421, + "loss": 0.6053, + "step": 71610 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022455, + "loss": 0.5928, + "step": 71620 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022447105263157893, + "loss": 0.6068, + "step": 71630 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022439210526315788, + "loss": 0.602, + "step": 71640 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022431315789473683, + "loss": 0.6049, + "step": 71650 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022423421052631578, + "loss": 0.6079, + "step": 71660 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022415526315789472, + "loss": 0.5928, + "step": 71670 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022407631578947367, + "loss": 0.5994, + "step": 71680 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002239973684210526, + "loss": 0.597, + "step": 71690 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022391842105263154, + "loss": 0.6096, + "step": 71700 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002238394736842105, + "loss": 0.6095, + "step": 71710 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022376052631578947, + "loss": 0.6015, + "step": 71720 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022368157894736841, + "loss": 0.6118, + "step": 71730 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022360263157894736, + "loss": 0.6141, + "step": 71740 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002235236842105263, + "loss": 0.6042, + "step": 71750 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022344473684210523, + "loss": 0.608, + "step": 71760 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022336578947368418, + "loss": 0.6134, + "step": 71770 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022328684210526313, + "loss": 0.6184, + "step": 71780 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022320789473684208, + "loss": 0.6086, + "step": 71790 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022312894736842103, + "loss": 0.6171, + "step": 71800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022305, + "loss": 0.6145, + "step": 71810 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022297105263157895, + "loss": 0.6011, + "step": 71820 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022289210526315787, + "loss": 0.6098, + "step": 71830 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022281315789473682, + "loss": 0.6053, + "step": 71840 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022273421052631577, + "loss": 0.6126, + "step": 71850 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022265526315789472, + "loss": 0.5937, + "step": 71860 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022257631578947366, + "loss": 0.6048, + "step": 71870 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002224973684210526, + "loss": 0.6046, + "step": 71880 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022241842105263153, + "loss": 0.6002, + "step": 71890 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022233947368421054, + "loss": 0.5936, + "step": 71900 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022226052631578946, + "loss": 0.6053, + "step": 71910 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002221815789473684, + "loss": 0.5948, + "step": 71920 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022210263157894735, + "loss": 0.5856, + "step": 71930 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002220315789473684, + "loss": 0.5845, + "step": 71940 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022195263157894735, + "loss": 0.5855, + "step": 71950 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022187368421052627, + "loss": 0.5879, + "step": 71960 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022179473684210525, + "loss": 0.582, + "step": 71970 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002217157894736842, + "loss": 0.5874, + "step": 71980 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022163684210526314, + "loss": 0.5876, + "step": 71990 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002215578947368421, + "loss": 0.5806, + "step": 72000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022147894736842104, + "loss": 0.5831, + "step": 72010 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002214, + "loss": 0.58, + "step": 72020 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002213210526315789, + "loss": 0.5838, + "step": 72030 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022124210526315786, + "loss": 0.5866, + "step": 72040 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002211631578947368, + "loss": 0.5803, + "step": 72050 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022108421052631578, + "loss": 0.5848, + "step": 72060 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022100526315789473, + "loss": 0.5816, + "step": 72070 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022092631578947368, + "loss": 0.5852, + "step": 72080 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022084736842105263, + "loss": 0.5868, + "step": 72090 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022076842105263157, + "loss": 0.5966, + "step": 72100 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002206894736842105, + "loss": 0.5947, + "step": 72110 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022061052631578944, + "loss": 0.5872, + "step": 72120 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002205315789473684, + "loss": 0.5674, + "step": 72130 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022045263157894734, + "loss": 0.5797, + "step": 72140 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002203736842105263, + "loss": 0.6056, + "step": 72150 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022029473684210526, + "loss": 0.5937, + "step": 72160 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002202157894736842, + "loss": 0.5895, + "step": 72170 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022013684210526313, + "loss": 0.5959, + "step": 72180 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022005789473684208, + "loss": 0.5931, + "step": 72190 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021997894736842103, + "loss": 0.5929, + "step": 72200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021989999999999998, + "loss": 0.586, + "step": 72210 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021982105263157893, + "loss": 0.5836, + "step": 72220 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021974210526315787, + "loss": 0.6077, + "step": 72230 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002196631578947368, + "loss": 0.5922, + "step": 72240 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021958421052631577, + "loss": 0.597, + "step": 72250 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021950526315789472, + "loss": 0.5902, + "step": 72260 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021942631578947367, + "loss": 0.5983, + "step": 72270 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021934736842105262, + "loss": 0.5861, + "step": 72280 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021926842105263156, + "loss": 0.6096, + "step": 72290 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002191894736842105, + "loss": 0.6019, + "step": 72300 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021911052631578943, + "loss": 0.6009, + "step": 72310 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021903157894736838, + "loss": 0.5955, + "step": 72320 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021895263157894733, + "loss": 0.5925, + "step": 72330 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002188736842105263, + "loss": 0.6106, + "step": 72340 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021879473684210525, + "loss": 0.6014, + "step": 72350 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002187157894736842, + "loss": 0.5975, + "step": 72360 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021863684210526315, + "loss": 0.595, + "step": 72370 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002185578947368421, + "loss": 0.5984, + "step": 72380 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021847894736842102, + "loss": 0.5999, + "step": 72390 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021839999999999997, + "loss": 0.5956, + "step": 72400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021832105263157892, + "loss": 0.5924, + "step": 72410 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021824210526315787, + "loss": 0.5907, + "step": 72420 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021816315789473681, + "loss": 0.5842, + "step": 72430 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002180842105263158, + "loss": 0.5978, + "step": 72440 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021800526315789474, + "loss": 0.5884, + "step": 72450 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021792631578947366, + "loss": 0.5898, + "step": 72460 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002178473684210526, + "loss": 0.5941, + "step": 72470 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021776842105263156, + "loss": 0.6033, + "step": 72480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002176894736842105, + "loss": 0.6142, + "step": 72490 + }, + { + "epoch": 0.72, + "learning_rate": 0.00021761052631578945, + "loss": 0.5965, + "step": 72500 + }, + { + "epoch": 0.72, + "eval_accuracy": 0.8749469840503773, + "eval_loss": 0.576171875, + "eval_runtime": 99.6694, + "eval_samples_per_second": 802.653, + "eval_steps_per_second": 1.575, + "step": 72500 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002175315789473684, + "loss": 0.5961, + "step": 72510 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021745263157894732, + "loss": 0.6001, + "step": 72520 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021737368421052632, + "loss": 0.6151, + "step": 72530 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021729473684210525, + "loss": 0.6152, + "step": 72540 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002172157894736842, + "loss": 0.5974, + "step": 72550 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021713684210526314, + "loss": 0.6027, + "step": 72560 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002170578947368421, + "loss": 0.6109, + "step": 72570 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021697894736842104, + "loss": 0.5968, + "step": 72580 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002169, + "loss": 0.6037, + "step": 72590 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002168210526315789, + "loss": 0.5886, + "step": 72600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021674210526315786, + "loss": 0.5981, + "step": 72610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021666315789473683, + "loss": 0.6046, + "step": 72620 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021658421052631578, + "loss": 0.5896, + "step": 72630 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021650526315789473, + "loss": 0.5975, + "step": 72640 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021642631578947368, + "loss": 0.5955, + "step": 72650 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021634736842105262, + "loss": 0.592, + "step": 72660 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021626842105263155, + "loss": 0.6062, + "step": 72670 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002161894736842105, + "loss": 0.596, + "step": 72680 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021611052631578944, + "loss": 0.6005, + "step": 72690 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002160315789473684, + "loss": 0.6064, + "step": 72700 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021595263157894737, + "loss": 0.5907, + "step": 72710 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021587368421052631, + "loss": 0.5921, + "step": 72720 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021579473684210526, + "loss": 0.6098, + "step": 72730 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021571578947368418, + "loss": 0.5913, + "step": 72740 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021563684210526313, + "loss": 0.5979, + "step": 72750 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021555789473684208, + "loss": 0.6038, + "step": 72760 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021547894736842103, + "loss": 0.6031, + "step": 72770 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021539999999999998, + "loss": 0.6046, + "step": 72780 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021532105263157893, + "loss": 0.6048, + "step": 72790 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021524210526315785, + "loss": 0.5917, + "step": 72800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021516315789473685, + "loss": 0.5912, + "step": 72810 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021508421052631577, + "loss": 0.6047, + "step": 72820 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021500526315789472, + "loss": 0.5936, + "step": 72830 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021492631578947367, + "loss": 0.5931, + "step": 72840 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021484736842105262, + "loss": 0.5977, + "step": 72850 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021476842105263156, + "loss": 0.6, + "step": 72860 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002146894736842105, + "loss": 0.5975, + "step": 72870 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021461052631578943, + "loss": 0.5967, + "step": 72880 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021453157894736838, + "loss": 0.6096, + "step": 72890 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021445263157894736, + "loss": 0.5931, + "step": 72900 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002143736842105263, + "loss": 0.5879, + "step": 72910 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021429473684210525, + "loss": 0.5894, + "step": 72920 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002142157894736842, + "loss": 0.5986, + "step": 72930 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021414473684210522, + "loss": 0.585, + "step": 72940 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021406578947368417, + "loss": 0.5892, + "step": 72950 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021398684210526312, + "loss": 0.5951, + "step": 72960 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002139078947368421, + "loss": 0.596, + "step": 72970 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021382894736842104, + "loss": 0.596, + "step": 72980 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021375, + "loss": 0.5977, + "step": 72990 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021367105263157894, + "loss": 0.593, + "step": 73000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002135921052631579, + "loss": 0.5901, + "step": 73010 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002135131578947368, + "loss": 0.5919, + "step": 73020 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021343421052631576, + "loss": 0.5984, + "step": 73030 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002133552631578947, + "loss": 0.597, + "step": 73040 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021327631578947365, + "loss": 0.6008, + "step": 73050 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021319736842105263, + "loss": 0.5879, + "step": 73060 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021311842105263158, + "loss": 0.5941, + "step": 73070 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021303947368421053, + "loss": 0.588, + "step": 73080 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021296052631578945, + "loss": 0.6014, + "step": 73090 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002128815789473684, + "loss": 0.6001, + "step": 73100 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021280263157894734, + "loss": 0.5957, + "step": 73110 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002127236842105263, + "loss": 0.5876, + "step": 73120 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021264473684210524, + "loss": 0.59, + "step": 73130 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002125657894736842, + "loss": 0.6049, + "step": 73140 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021248684210526316, + "loss": 0.597, + "step": 73150 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002124078947368421, + "loss": 0.604, + "step": 73160 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021232894736842103, + "loss": 0.6002, + "step": 73170 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021224999999999998, + "loss": 0.5974, + "step": 73180 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021217105263157893, + "loss": 0.5976, + "step": 73190 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021209210526315788, + "loss": 0.5933, + "step": 73200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021201315789473683, + "loss": 0.5826, + "step": 73210 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021193421052631578, + "loss": 0.5848, + "step": 73220 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002118552631578947, + "loss": 0.5892, + "step": 73230 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021177631578947364, + "loss": 0.5816, + "step": 73240 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021169736842105262, + "loss": 0.5683, + "step": 73250 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021161842105263157, + "loss": 0.5969, + "step": 73260 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021154736842105262, + "loss": 0.5983, + "step": 73270 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021146842105263156, + "loss": 0.5935, + "step": 73280 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021138947368421049, + "loss": 0.6056, + "step": 73290 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021131052631578943, + "loss": 0.5983, + "step": 73300 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002112315789473684, + "loss": 0.6023, + "step": 73310 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021115263157894736, + "loss": 0.591, + "step": 73320 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002110736842105263, + "loss": 0.5914, + "step": 73330 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021099473684210525, + "loss": 0.5944, + "step": 73340 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002109157894736842, + "loss": 0.5988, + "step": 73350 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021083684210526312, + "loss": 0.587, + "step": 73360 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021075789473684207, + "loss": 0.5785, + "step": 73370 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021067894736842102, + "loss": 0.6024, + "step": 73380 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021059999999999997, + "loss": 0.5973, + "step": 73390 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021052105263157892, + "loss": 0.5994, + "step": 73400 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002104421052631579, + "loss": 0.5964, + "step": 73410 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021036315789473684, + "loss": 0.5854, + "step": 73420 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002102842105263158, + "loss": 0.5947, + "step": 73430 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002102052631578947, + "loss": 0.5925, + "step": 73440 + }, + { + "epoch": 0.73, + "learning_rate": 0.00021012631578947366, + "loss": 0.5833, + "step": 73450 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002100473684210526, + "loss": 0.5844, + "step": 73460 + }, + { + "epoch": 0.73, + "learning_rate": 0.00020996842105263156, + "loss": 0.5915, + "step": 73470 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002098894736842105, + "loss": 0.5919, + "step": 73480 + }, + { + "epoch": 0.73, + "learning_rate": 0.00020981052631578945, + "loss": 0.5949, + "step": 73490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00020973157894736843, + "loss": 0.5937, + "step": 73500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020965263157894735, + "loss": 0.5894, + "step": 73510 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002095736842105263, + "loss": 0.5986, + "step": 73520 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020949473684210525, + "loss": 0.5838, + "step": 73530 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002094157894736842, + "loss": 0.5831, + "step": 73540 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020933684210526314, + "loss": 0.5878, + "step": 73550 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002092578947368421, + "loss": 0.5914, + "step": 73560 + }, + { + "epoch": 0.74, + "learning_rate": 0.000209178947368421, + "loss": 0.5783, + "step": 73570 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020909999999999996, + "loss": 0.599, + "step": 73580 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020902105263157894, + "loss": 0.59, + "step": 73590 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020894210526315788, + "loss": 0.5932, + "step": 73600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020886315789473683, + "loss": 0.5887, + "step": 73610 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020878421052631578, + "loss": 0.5971, + "step": 73620 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020870526315789473, + "loss": 0.5933, + "step": 73630 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020862631578947368, + "loss": 0.5931, + "step": 73640 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002085473684210526, + "loss": 0.5976, + "step": 73650 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020846842105263155, + "loss": 0.5878, + "step": 73660 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002083894736842105, + "loss": 0.5901, + "step": 73670 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020831052631578944, + "loss": 0.5919, + "step": 73680 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020823157894736842, + "loss": 0.6052, + "step": 73690 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020815263157894737, + "loss": 0.5924, + "step": 73700 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020807368421052631, + "loss": 0.5885, + "step": 73710 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020799473684210524, + "loss": 0.596, + "step": 73720 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020791578947368418, + "loss": 0.5957, + "step": 73730 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020783684210526313, + "loss": 0.5959, + "step": 73740 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020775789473684208, + "loss": 0.5915, + "step": 73750 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020767894736842103, + "loss": 0.5849, + "step": 73760 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020759999999999998, + "loss": 0.6014, + "step": 73770 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020752105263157895, + "loss": 0.5915, + "step": 73780 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020744210526315787, + "loss": 0.5988, + "step": 73790 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020736315789473682, + "loss": 0.5941, + "step": 73800 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020728421052631577, + "loss": 0.5902, + "step": 73810 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020720526315789472, + "loss": 0.5847, + "step": 73820 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020712631578947367, + "loss": 0.5897, + "step": 73830 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020704736842105262, + "loss": 0.5903, + "step": 73840 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020696842105263154, + "loss": 0.592, + "step": 73850 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020688947368421049, + "loss": 0.5881, + "step": 73860 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020681052631578946, + "loss": 0.5812, + "step": 73870 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002067315789473684, + "loss": 0.5876, + "step": 73880 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020665263157894736, + "loss": 0.5995, + "step": 73890 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002065736842105263, + "loss": 0.5869, + "step": 73900 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020649473684210525, + "loss": 0.5843, + "step": 73910 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002064157894736842, + "loss": 0.5949, + "step": 73920 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020633684210526312, + "loss": 0.6077, + "step": 73930 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020625789473684207, + "loss": 0.5957, + "step": 73940 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020617894736842102, + "loss": 0.589, + "step": 73950 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002061, + "loss": 0.6004, + "step": 73960 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020602105263157894, + "loss": 0.596, + "step": 73970 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002059421052631579, + "loss": 0.5931, + "step": 73980 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020586315789473684, + "loss": 0.5855, + "step": 73990 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020578421052631576, + "loss": 0.5928, + "step": 74000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002057052631578947, + "loss": 0.5871, + "step": 74010 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020562631578947366, + "loss": 0.597, + "step": 74020 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002055473684210526, + "loss": 0.5806, + "step": 74030 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020546842105263155, + "loss": 0.5913, + "step": 74040 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002053894736842105, + "loss": 0.587, + "step": 74050 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020531052631578948, + "loss": 0.5958, + "step": 74060 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020523157894736843, + "loss": 0.5976, + "step": 74070 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020515263157894735, + "loss": 0.5926, + "step": 74080 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002050736842105263, + "loss": 0.5866, + "step": 74090 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020499473684210524, + "loss": 0.5865, + "step": 74100 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002049157894736842, + "loss": 0.596, + "step": 74110 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020483684210526314, + "loss": 0.5871, + "step": 74120 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002047578947368421, + "loss": 0.5887, + "step": 74130 + }, + { + "epoch": 0.74, + "learning_rate": 0.000204678947368421, + "loss": 0.58, + "step": 74140 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020459999999999999, + "loss": 0.5958, + "step": 74150 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020452105263157893, + "loss": 0.5887, + "step": 74160 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020444210526315788, + "loss": 0.6021, + "step": 74170 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020436315789473683, + "loss": 0.5973, + "step": 74180 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020428421052631578, + "loss": 0.5917, + "step": 74190 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020420526315789473, + "loss": 0.5885, + "step": 74200 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020412631578947365, + "loss": 0.5923, + "step": 74210 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002040473684210526, + "loss": 0.5991, + "step": 74220 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020396842105263155, + "loss": 0.594, + "step": 74230 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020388947368421052, + "loss": 0.5846, + "step": 74240 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020381052631578947, + "loss": 0.5927, + "step": 74250 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020373157894736842, + "loss": 0.5902, + "step": 74260 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020365263157894737, + "loss": 0.5959, + "step": 74270 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002035736842105263, + "loss": 0.595, + "step": 74280 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020349473684210524, + "loss": 0.5999, + "step": 74290 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020341578947368418, + "loss": 0.5981, + "step": 74300 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020333684210526313, + "loss": 0.5802, + "step": 74310 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020325789473684208, + "loss": 0.5898, + "step": 74320 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020317894736842103, + "loss": 0.5907, + "step": 74330 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002031, + "loss": 0.5945, + "step": 74340 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020302105263157895, + "loss": 0.5891, + "step": 74350 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020294210526315787, + "loss": 0.5939, + "step": 74360 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020286315789473682, + "loss": 0.5917, + "step": 74370 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020278421052631577, + "loss": 0.6046, + "step": 74380 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020270526315789472, + "loss": 0.5886, + "step": 74390 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020262631578947367, + "loss": 0.5927, + "step": 74400 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020254736842105261, + "loss": 0.5799, + "step": 74410 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020246842105263154, + "loss": 0.5988, + "step": 74420 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002023894736842105, + "loss": 0.5747, + "step": 74430 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020231052631578946, + "loss": 0.6025, + "step": 74440 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002022315789473684, + "loss": 0.5961, + "step": 74450 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020215263157894736, + "loss": 0.6067, + "step": 74460 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002020736842105263, + "loss": 0.5855, + "step": 74470 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020199473684210525, + "loss": 0.603, + "step": 74480 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020191578947368417, + "loss": 0.5925, + "step": 74490 + }, + { + "epoch": 0.74, + "learning_rate": 0.00020183684210526312, + "loss": 0.5881, + "step": 74500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020175789473684207, + "loss": 0.5895, + "step": 74510 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020167894736842105, + "loss": 0.5814, + "step": 74520 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002016, + "loss": 0.5903, + "step": 74530 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020152105263157894, + "loss": 0.5922, + "step": 74540 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002014421052631579, + "loss": 0.5976, + "step": 74550 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020136315789473684, + "loss": 0.5997, + "step": 74560 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020128421052631576, + "loss": 0.5949, + "step": 74570 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002012052631578947, + "loss": 0.5946, + "step": 74580 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020112631578947366, + "loss": 0.5893, + "step": 74590 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002010473684210526, + "loss": 0.5897, + "step": 74600 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020096842105263155, + "loss": 0.5852, + "step": 74610 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020088947368421053, + "loss": 0.5922, + "step": 74620 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020081052631578948, + "loss": 0.5941, + "step": 74630 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002007315789473684, + "loss": 0.5982, + "step": 74640 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020065263157894735, + "loss": 0.5922, + "step": 74650 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002005736842105263, + "loss": 0.5886, + "step": 74660 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020049473684210524, + "loss": 0.5966, + "step": 74670 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002004157894736842, + "loss": 0.5947, + "step": 74680 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020033684210526314, + "loss": 0.601, + "step": 74690 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020025789473684206, + "loss": 0.5969, + "step": 74700 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020017894736842104, + "loss": 0.593, + "step": 74710 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020009999999999998, + "loss": 0.5953, + "step": 74720 + }, + { + "epoch": 0.75, + "learning_rate": 0.00020002105263157893, + "loss": 0.5841, + "step": 74730 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019994210526315788, + "loss": 0.5884, + "step": 74740 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019986315789473683, + "loss": 0.5938, + "step": 74750 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019978421052631578, + "loss": 0.588, + "step": 74760 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001997052631578947, + "loss": 0.597, + "step": 74770 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019962631578947365, + "loss": 0.602, + "step": 74780 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001995473684210526, + "loss": 0.6009, + "step": 74790 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019946842105263157, + "loss": 0.5925, + "step": 74800 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019938947368421052, + "loss": 0.6012, + "step": 74810 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019931052631578947, + "loss": 0.6001, + "step": 74820 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019923157894736842, + "loss": 0.5994, + "step": 74830 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019915263157894736, + "loss": 0.5961, + "step": 74840 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019907368421052629, + "loss": 0.6031, + "step": 74850 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019899473684210523, + "loss": 0.5944, + "step": 74860 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019891578947368418, + "loss": 0.5957, + "step": 74870 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019883684210526313, + "loss": 0.5954, + "step": 74880 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019875789473684208, + "loss": 0.5925, + "step": 74890 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019867894736842105, + "loss": 0.5936, + "step": 74900 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001986, + "loss": 0.6004, + "step": 74910 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019852105263157892, + "loss": 0.5987, + "step": 74920 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019844210526315787, + "loss": 0.5971, + "step": 74930 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019836315789473682, + "loss": 0.5983, + "step": 74940 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019828421052631577, + "loss": 0.5889, + "step": 74950 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019820526315789472, + "loss": 0.5969, + "step": 74960 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019812631578947367, + "loss": 0.6017, + "step": 74970 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001980473684210526, + "loss": 0.5875, + "step": 74980 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001979684210526316, + "loss": 0.5982, + "step": 74990 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001978894736842105, + "loss": 0.5837, + "step": 75000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.8770046977807257, + "eval_loss": 0.56396484375, + "eval_runtime": 98.5907, + "eval_samples_per_second": 811.436, + "eval_steps_per_second": 1.592, + "step": 75000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019781052631578946, + "loss": 0.5881, + "step": 75010 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001977315789473684, + "loss": 0.5946, + "step": 75020 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019765263157894736, + "loss": 0.588, + "step": 75030 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001975736842105263, + "loss": 0.5886, + "step": 75040 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019749473684210525, + "loss": 0.5792, + "step": 75050 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019741578947368417, + "loss": 0.5923, + "step": 75060 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019733684210526312, + "loss": 0.6011, + "step": 75070 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001972578947368421, + "loss": 0.5931, + "step": 75080 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019717894736842105, + "loss": 0.5948, + "step": 75090 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001971, + "loss": 0.5909, + "step": 75100 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019702105263157894, + "loss": 0.5982, + "step": 75110 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001969421052631579, + "loss": 0.5944, + "step": 75120 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001968631578947368, + "loss": 0.5918, + "step": 75130 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019678421052631576, + "loss": 0.5985, + "step": 75140 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001967052631578947, + "loss": 0.5968, + "step": 75150 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019662631578947366, + "loss": 0.5939, + "step": 75160 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001965473684210526, + "loss": 0.5976, + "step": 75170 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019646842105263158, + "loss": 0.5988, + "step": 75180 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019638947368421053, + "loss": 0.6072, + "step": 75190 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019631052631578945, + "loss": 0.5909, + "step": 75200 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001962315789473684, + "loss": 0.5811, + "step": 75210 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019615263157894735, + "loss": 0.5932, + "step": 75220 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001960736842105263, + "loss": 0.5838, + "step": 75230 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019599473684210524, + "loss": 0.5789, + "step": 75240 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001959157894736842, + "loss": 0.5852, + "step": 75250 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001958368421052631, + "loss": 0.5922, + "step": 75260 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019575789473684211, + "loss": 0.5971, + "step": 75270 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019568684210526314, + "loss": 0.59, + "step": 75280 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019560789473684208, + "loss": 0.5934, + "step": 75290 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019552894736842103, + "loss": 0.5961, + "step": 75300 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019544999999999998, + "loss": 0.5928, + "step": 75310 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019537105263157893, + "loss": 0.5838, + "step": 75320 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001952921052631579, + "loss": 0.5978, + "step": 75330 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019521315789473683, + "loss": 0.5994, + "step": 75340 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019513421052631577, + "loss": 0.5816, + "step": 75350 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019505526315789472, + "loss": 0.6051, + "step": 75360 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019497631578947367, + "loss": 0.5852, + "step": 75370 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019489736842105262, + "loss": 0.5833, + "step": 75380 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019481842105263157, + "loss": 0.5901, + "step": 75390 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001947394736842105, + "loss": 0.5961, + "step": 75400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019466052631578944, + "loss": 0.5979, + "step": 75410 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019458157894736838, + "loss": 0.5892, + "step": 75420 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019450263157894736, + "loss": 0.6031, + "step": 75430 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001944236842105263, + "loss": 0.592, + "step": 75440 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019434473684210526, + "loss": 0.6006, + "step": 75450 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001942657894736842, + "loss": 0.5834, + "step": 75460 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019418684210526315, + "loss": 0.5959, + "step": 75470 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019410789473684207, + "loss": 0.5937, + "step": 75480 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019402894736842102, + "loss": 0.5794, + "step": 75490 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019394999999999997, + "loss": 0.589, + "step": 75500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019387105263157892, + "loss": 0.5825, + "step": 75510 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001937921052631579, + "loss": 0.5892, + "step": 75520 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019371315789473684, + "loss": 0.5842, + "step": 75530 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001936342105263158, + "loss": 0.5992, + "step": 75540 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001935552631578947, + "loss": 0.5851, + "step": 75550 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019347631578947366, + "loss": 0.5842, + "step": 75560 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001933973684210526, + "loss": 0.5936, + "step": 75570 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019331842105263156, + "loss": 0.58, + "step": 75580 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001932394736842105, + "loss": 0.5877, + "step": 75590 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019316052631578945, + "loss": 0.5899, + "step": 75600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019308157894736843, + "loss": 0.5859, + "step": 75610 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019300263157894738, + "loss": 0.5906, + "step": 75620 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001929236842105263, + "loss": 0.6013, + "step": 75630 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019284473684210525, + "loss": 0.5922, + "step": 75640 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001927657894736842, + "loss": 0.593, + "step": 75650 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019268684210526314, + "loss": 0.5846, + "step": 75660 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001926078947368421, + "loss": 0.6047, + "step": 75670 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019252894736842104, + "loss": 0.5999, + "step": 75680 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019244999999999996, + "loss": 0.5929, + "step": 75690 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001923710526315789, + "loss": 0.5971, + "step": 75700 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019229210526315789, + "loss": 0.5924, + "step": 75710 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019221315789473683, + "loss": 0.5932, + "step": 75720 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019213421052631578, + "loss": 0.5953, + "step": 75730 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019205526315789473, + "loss": 0.5933, + "step": 75740 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019197631578947368, + "loss": 0.5838, + "step": 75750 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001918973684210526, + "loss": 0.5949, + "step": 75760 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019181842105263155, + "loss": 0.583, + "step": 75770 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001917394736842105, + "loss": 0.5799, + "step": 75780 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019166052631578944, + "loss": 0.5821, + "step": 75790 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019158157894736842, + "loss": 0.582, + "step": 75800 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019150263157894737, + "loss": 0.5926, + "step": 75810 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019142368421052632, + "loss": 0.5814, + "step": 75820 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019134473684210524, + "loss": 0.5775, + "step": 75830 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019126578947368419, + "loss": 0.589, + "step": 75840 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019118684210526313, + "loss": 0.5776, + "step": 75850 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019110789473684208, + "loss": 0.5853, + "step": 75860 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019102894736842103, + "loss": 0.5883, + "step": 75870 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019094999999999998, + "loss": 0.5909, + "step": 75880 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019087105263157895, + "loss": 0.592, + "step": 75890 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001907921052631579, + "loss": 0.5967, + "step": 75900 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019071315789473682, + "loss": 0.5933, + "step": 75910 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019063421052631577, + "loss": 0.5856, + "step": 75920 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019055526315789472, + "loss": 0.5865, + "step": 75930 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019047631578947367, + "loss": 0.5849, + "step": 75940 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019039736842105262, + "loss": 0.5971, + "step": 75950 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019031842105263157, + "loss": 0.5906, + "step": 75960 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001902394736842105, + "loss": 0.5767, + "step": 75970 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019016052631578944, + "loss": 0.5784, + "step": 75980 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001900815789473684, + "loss": 0.5832, + "step": 75990 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019000263157894736, + "loss": 0.5943, + "step": 76000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001899236842105263, + "loss": 0.5917, + "step": 76010 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018984473684210526, + "loss": 0.596, + "step": 76020 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001897657894736842, + "loss": 0.588, + "step": 76030 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018968684210526313, + "loss": 0.6016, + "step": 76040 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018960789473684207, + "loss": 0.5879, + "step": 76050 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018952894736842102, + "loss": 0.5827, + "step": 76060 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018944999999999997, + "loss": 0.5805, + "step": 76070 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018937105263157895, + "loss": 0.5955, + "step": 76080 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001892921052631579, + "loss": 0.5977, + "step": 76090 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018921315789473684, + "loss": 0.5988, + "step": 76100 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001891342105263158, + "loss": 0.5972, + "step": 76110 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001890552631578947, + "loss": 0.5906, + "step": 76120 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018897631578947366, + "loss": 0.5982, + "step": 76130 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001888973684210526, + "loss": 0.5872, + "step": 76140 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018881842105263156, + "loss": 0.5906, + "step": 76150 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001887394736842105, + "loss": 0.6073, + "step": 76160 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018866052631578948, + "loss": 0.6051, + "step": 76170 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018858157894736843, + "loss": 0.5927, + "step": 76180 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018850263157894735, + "loss": 0.5896, + "step": 76190 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001884236842105263, + "loss": 0.586, + "step": 76200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018834473684210525, + "loss": 0.5846, + "step": 76210 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001882657894736842, + "loss": 0.5976, + "step": 76220 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018818684210526314, + "loss": 0.5955, + "step": 76230 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001881078947368421, + "loss": 0.5932, + "step": 76240 + }, + { + "epoch": 0.76, + "learning_rate": 0.000188028947368421, + "loss": 0.593, + "step": 76250 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018794999999999996, + "loss": 0.5816, + "step": 76260 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018787105263157894, + "loss": 0.5801, + "step": 76270 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018779999999999998, + "loss": 0.5974, + "step": 76280 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018772105263157893, + "loss": 0.5982, + "step": 76290 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018764210526315788, + "loss": 0.5994, + "step": 76300 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001875631578947368, + "loss": 0.5968, + "step": 76310 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018748421052631575, + "loss": 0.6025, + "step": 76320 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018740526315789473, + "loss": 0.5866, + "step": 76330 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018732631578947367, + "loss": 0.5938, + "step": 76340 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018724736842105262, + "loss": 0.5912, + "step": 76350 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018716842105263157, + "loss": 0.5797, + "step": 76360 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018708947368421052, + "loss": 0.6013, + "step": 76370 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018701052631578947, + "loss": 0.5921, + "step": 76380 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001869315789473684, + "loss": 0.5994, + "step": 76390 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018685263157894734, + "loss": 0.5708, + "step": 76400 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018677368421052628, + "loss": 0.5767, + "step": 76410 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018669473684210523, + "loss": 0.5678, + "step": 76420 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001866157894736842, + "loss": 0.5777, + "step": 76430 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018653684210526316, + "loss": 0.5798, + "step": 76440 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001864578947368421, + "loss": 0.5749, + "step": 76450 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018637894736842103, + "loss": 0.5691, + "step": 76460 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018629999999999997, + "loss": 0.5552, + "step": 76470 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018622105263157892, + "loss": 0.5727, + "step": 76480 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018614210526315787, + "loss": 0.5828, + "step": 76490 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018606315789473682, + "loss": 0.5758, + "step": 76500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018598421052631577, + "loss": 0.5768, + "step": 76510 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018590526315789474, + "loss": 0.5644, + "step": 76520 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001858263157894737, + "loss": 0.5651, + "step": 76530 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001857473684210526, + "loss": 0.5793, + "step": 76540 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018566842105263156, + "loss": 0.5897, + "step": 76550 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001855894736842105, + "loss": 0.5769, + "step": 76560 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018551052631578946, + "loss": 0.5866, + "step": 76570 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001854315789473684, + "loss": 0.5854, + "step": 76580 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018535263157894735, + "loss": 0.5828, + "step": 76590 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018527368421052628, + "loss": 0.581, + "step": 76600 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018519473684210525, + "loss": 0.57, + "step": 76610 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001851157894736842, + "loss": 0.5655, + "step": 76620 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018503684210526315, + "loss": 0.5806, + "step": 76630 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001849578947368421, + "loss": 0.5757, + "step": 76640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018488684210526314, + "loss": 0.5872, + "step": 76650 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018480789473684207, + "loss": 0.5703, + "step": 76660 + }, + { + "epoch": 0.77, + "learning_rate": 0.000184728947368421, + "loss": 0.5723, + "step": 76670 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018465, + "loss": 0.5638, + "step": 76680 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018457105263157894, + "loss": 0.5819, + "step": 76690 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018449210526315789, + "loss": 0.5739, + "step": 76700 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018441315789473683, + "loss": 0.5862, + "step": 76710 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018433421052631578, + "loss": 0.5625, + "step": 76720 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018425526315789473, + "loss": 0.5555, + "step": 76730 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018417631578947365, + "loss": 0.5683, + "step": 76740 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001840973684210526, + "loss": 0.5669, + "step": 76750 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018401842105263155, + "loss": 0.5604, + "step": 76760 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018393947368421052, + "loss": 0.5773, + "step": 76770 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018386052631578947, + "loss": 0.5661, + "step": 76780 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018378157894736842, + "loss": 0.5645, + "step": 76790 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018370263157894737, + "loss": 0.5782, + "step": 76800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001836236842105263, + "loss": 0.5892, + "step": 76810 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018354473684210524, + "loss": 0.5789, + "step": 76820 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018346578947368419, + "loss": 0.5755, + "step": 76830 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018338684210526313, + "loss": 0.5721, + "step": 76840 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018330789473684208, + "loss": 0.5785, + "step": 76850 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018322894736842106, + "loss": 0.5665, + "step": 76860 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018315, + "loss": 0.567, + "step": 76870 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018307105263157893, + "loss": 0.5833, + "step": 76880 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018299210526315788, + "loss": 0.596, + "step": 76890 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018291315789473682, + "loss": 0.5872, + "step": 76900 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018283421052631577, + "loss": 0.5898, + "step": 76910 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018275526315789472, + "loss": 0.5931, + "step": 76920 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018267631578947367, + "loss": 0.592, + "step": 76930 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001825973684210526, + "loss": 0.5917, + "step": 76940 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018251842105263154, + "loss": 0.5895, + "step": 76950 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018243947368421051, + "loss": 0.5849, + "step": 76960 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018236052631578946, + "loss": 0.5931, + "step": 76970 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001822815789473684, + "loss": 0.5846, + "step": 76980 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018220263157894736, + "loss": 0.5949, + "step": 76990 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001821236842105263, + "loss": 0.5936, + "step": 77000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018204473684210526, + "loss": 0.5891, + "step": 77010 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018196578947368418, + "loss": 0.5882, + "step": 77020 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018188684210526313, + "loss": 0.5777, + "step": 77030 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018180789473684207, + "loss": 0.5873, + "step": 77040 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018172894736842105, + "loss": 0.5893, + "step": 77050 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018165, + "loss": 0.5855, + "step": 77060 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018157105263157895, + "loss": 0.5977, + "step": 77070 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001814921052631579, + "loss": 0.6028, + "step": 77080 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018141315789473682, + "loss": 0.5925, + "step": 77090 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018133421052631576, + "loss": 0.5958, + "step": 77100 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001812552631578947, + "loss": 0.5835, + "step": 77110 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018117631578947366, + "loss": 0.5854, + "step": 77120 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001810973684210526, + "loss": 0.5894, + "step": 77130 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018101842105263158, + "loss": 0.5928, + "step": 77140 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018093947368421053, + "loss": 0.5896, + "step": 77150 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018086052631578948, + "loss": 0.5805, + "step": 77160 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001807815789473684, + "loss": 0.5839, + "step": 77170 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018070263157894735, + "loss": 0.5863, + "step": 77180 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001806236842105263, + "loss": 0.5859, + "step": 77190 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018054473684210525, + "loss": 0.5914, + "step": 77200 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001804657894736842, + "loss": 0.5905, + "step": 77210 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018038684210526314, + "loss": 0.5948, + "step": 77220 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018030789473684206, + "loss": 0.5882, + "step": 77230 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018022894736842104, + "loss": 0.59, + "step": 77240 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018015, + "loss": 0.5804, + "step": 77250 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018007105263157894, + "loss": 0.5853, + "step": 77260 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017999210526315788, + "loss": 0.5858, + "step": 77270 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017991315789473683, + "loss": 0.6019, + "step": 77280 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017983421052631578, + "loss": 0.599, + "step": 77290 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001797552631578947, + "loss": 0.5872, + "step": 77300 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017967631578947365, + "loss": 0.5993, + "step": 77310 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001795973684210526, + "loss": 0.6082, + "step": 77320 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017951842105263157, + "loss": 0.5967, + "step": 77330 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017943947368421052, + "loss": 0.5995, + "step": 77340 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017936052631578947, + "loss": 0.5943, + "step": 77350 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017928157894736842, + "loss": 0.5915, + "step": 77360 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017920263157894734, + "loss": 0.5712, + "step": 77370 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001791236842105263, + "loss": 0.5805, + "step": 77380 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017904473684210524, + "loss": 0.5757, + "step": 77390 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017896578947368419, + "loss": 0.5926, + "step": 77400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017888684210526313, + "loss": 0.5889, + "step": 77410 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001788078947368421, + "loss": 0.5812, + "step": 77420 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017872894736842106, + "loss": 0.5989, + "step": 77430 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017865, + "loss": 0.5755, + "step": 77440 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017857105263157893, + "loss": 0.5941, + "step": 77450 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017849210526315788, + "loss": 0.5814, + "step": 77460 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017841315789473682, + "loss": 0.5839, + "step": 77470 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017833421052631577, + "loss": 0.5837, + "step": 77480 + }, + { + "epoch": 0.77, + "learning_rate": 0.00017825526315789472, + "loss": 0.5764, + "step": 77490 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017817631578947367, + "loss": 0.5866, + "step": 77500 + }, + { + "epoch": 0.78, + "eval_accuracy": 0.8773601876392698, + "eval_loss": 0.5615234375, + "eval_runtime": 98.3407, + "eval_samples_per_second": 813.499, + "eval_steps_per_second": 1.596, + "step": 77500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001780973684210526, + "loss": 0.5687, + "step": 77510 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017801842105263156, + "loss": 0.5813, + "step": 77520 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001779394736842105, + "loss": 0.5739, + "step": 77530 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017786052631578946, + "loss": 0.5825, + "step": 77540 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001777815789473684, + "loss": 0.5699, + "step": 77550 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017770263157894736, + "loss": 0.5796, + "step": 77560 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001776236842105263, + "loss": 0.571, + "step": 77570 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017754473684210523, + "loss": 0.5849, + "step": 77580 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017746578947368418, + "loss": 0.5826, + "step": 77590 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017738684210526312, + "loss": 0.6047, + "step": 77600 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001773078947368421, + "loss": 0.5951, + "step": 77610 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017722894736842105, + "loss": 0.5876, + "step": 77620 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017715, + "loss": 0.5915, + "step": 77630 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017707105263157894, + "loss": 0.5894, + "step": 77640 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001769921052631579, + "loss": 0.5891, + "step": 77650 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017691315789473681, + "loss": 0.5863, + "step": 77660 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017683421052631576, + "loss": 0.5891, + "step": 77670 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001767552631578947, + "loss": 0.5976, + "step": 77680 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017667631578947366, + "loss": 0.5871, + "step": 77690 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017659736842105263, + "loss": 0.596, + "step": 77700 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017651842105263158, + "loss": 0.592, + "step": 77710 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017643947368421053, + "loss": 0.6012, + "step": 77720 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017636052631578945, + "loss": 0.5855, + "step": 77730 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001762815789473684, + "loss": 0.5898, + "step": 77740 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017620263157894735, + "loss": 0.6017, + "step": 77750 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001761236842105263, + "loss": 0.5926, + "step": 77760 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017604473684210525, + "loss": 0.591, + "step": 77770 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001759657894736842, + "loss": 0.596, + "step": 77780 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017588684210526311, + "loss": 0.5855, + "step": 77790 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001758078947368421, + "loss": 0.5889, + "step": 77800 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017572894736842104, + "loss": 0.5921, + "step": 77810 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017565, + "loss": 0.5874, + "step": 77820 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017557105263157894, + "loss": 0.5985, + "step": 77830 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017549210526315788, + "loss": 0.5736, + "step": 77840 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017541315789473683, + "loss": 0.5707, + "step": 77850 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017533421052631575, + "loss": 0.5861, + "step": 77860 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001752552631578947, + "loss": 0.5843, + "step": 77870 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017517631578947365, + "loss": 0.5825, + "step": 77880 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017509736842105263, + "loss": 0.5816, + "step": 77890 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017501842105263157, + "loss": 0.5691, + "step": 77900 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017493947368421052, + "loss": 0.5769, + "step": 77910 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017486052631578947, + "loss": 0.5875, + "step": 77920 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017478157894736842, + "loss": 0.5881, + "step": 77930 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017470263157894734, + "loss": 0.5793, + "step": 77940 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001746236842105263, + "loss": 0.5803, + "step": 77950 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017454473684210524, + "loss": 0.5789, + "step": 77960 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017446578947368418, + "loss": 0.5912, + "step": 77970 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017438684210526316, + "loss": 0.5939, + "step": 77980 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001743078947368421, + "loss": 0.5964, + "step": 77990 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017422894736842106, + "loss": 0.5815, + "step": 78000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017414999999999998, + "loss": 0.5832, + "step": 78010 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017407105263157893, + "loss": 0.5843, + "step": 78020 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017399210526315787, + "loss": 0.5763, + "step": 78030 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017391315789473682, + "loss": 0.5813, + "step": 78040 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017383421052631577, + "loss": 0.5774, + "step": 78050 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017375526315789472, + "loss": 0.5784, + "step": 78060 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001736763157894737, + "loss": 0.5669, + "step": 78070 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017359736842105264, + "loss": 0.5702, + "step": 78080 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017351842105263156, + "loss": 0.5785, + "step": 78090 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001734394736842105, + "loss": 0.5729, + "step": 78100 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017336052631578946, + "loss": 0.5699, + "step": 78110 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001732815789473684, + "loss": 0.578, + "step": 78120 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017320263157894736, + "loss": 0.5723, + "step": 78130 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001731236842105263, + "loss": 0.5908, + "step": 78140 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017304473684210523, + "loss": 0.5829, + "step": 78150 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017296578947368417, + "loss": 0.59, + "step": 78160 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017288684210526315, + "loss": 0.5888, + "step": 78170 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001728078947368421, + "loss": 0.5903, + "step": 78180 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017272894736842105, + "loss": 0.5908, + "step": 78190 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017265, + "loss": 0.5988, + "step": 78200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017257105263157894, + "loss": 0.5943, + "step": 78210 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017249210526315786, + "loss": 0.5881, + "step": 78220 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001724131578947368, + "loss": 0.5965, + "step": 78230 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017233421052631576, + "loss": 0.598, + "step": 78240 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001722552631578947, + "loss": 0.5954, + "step": 78250 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017217631578947369, + "loss": 0.6063, + "step": 78260 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017209736842105263, + "loss": 0.607, + "step": 78270 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017201842105263158, + "loss": 0.5848, + "step": 78280 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001719394736842105, + "loss": 0.5862, + "step": 78290 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017186052631578945, + "loss": 0.5925, + "step": 78300 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001717815789473684, + "loss": 0.5859, + "step": 78310 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017170263157894735, + "loss": 0.5928, + "step": 78320 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001716236842105263, + "loss": 0.5863, + "step": 78330 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017154473684210524, + "loss": 0.5849, + "step": 78340 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017146578947368422, + "loss": 0.5821, + "step": 78350 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017138684210526317, + "loss": 0.5849, + "step": 78360 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001713078947368421, + "loss": 0.5799, + "step": 78370 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017122894736842104, + "loss": 0.5936, + "step": 78380 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017114999999999999, + "loss": 0.5874, + "step": 78390 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017107105263157893, + "loss": 0.5931, + "step": 78400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017099210526315788, + "loss": 0.5942, + "step": 78410 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017091315789473683, + "loss": 0.5835, + "step": 78420 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017083421052631575, + "loss": 0.585, + "step": 78430 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001707552631578947, + "loss": 0.5985, + "step": 78440 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017067631578947368, + "loss": 0.5995, + "step": 78450 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017059736842105262, + "loss": 0.5992, + "step": 78460 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017051842105263157, + "loss": 0.5808, + "step": 78470 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017043947368421052, + "loss": 0.5961, + "step": 78480 + }, + { + "epoch": 0.78, + "learning_rate": 0.00017036052631578947, + "loss": 0.5751, + "step": 78490 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001702815789473684, + "loss": 0.5802, + "step": 78500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00017020263157894734, + "loss": 0.5793, + "step": 78510 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001701236842105263, + "loss": 0.5904, + "step": 78520 + }, + { + "epoch": 0.79, + "learning_rate": 0.00017004473684210524, + "loss": 0.5702, + "step": 78530 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001699657894736842, + "loss": 0.5715, + "step": 78540 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016988684210526316, + "loss": 0.5723, + "step": 78550 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001698078947368421, + "loss": 0.5778, + "step": 78560 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016973684210526313, + "loss": 0.5969, + "step": 78570 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016965789473684208, + "loss": 0.6002, + "step": 78580 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016957894736842102, + "loss": 0.5946, + "step": 78590 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016949999999999997, + "loss": 0.5937, + "step": 78600 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016942105263157895, + "loss": 0.5941, + "step": 78610 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001693421052631579, + "loss": 0.611, + "step": 78620 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016926315789473684, + "loss": 0.5932, + "step": 78630 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016918421052631577, + "loss": 0.6037, + "step": 78640 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016910526315789471, + "loss": 0.596, + "step": 78650 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016902631578947366, + "loss": 0.5977, + "step": 78660 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001689473684210526, + "loss": 0.6065, + "step": 78670 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016886842105263156, + "loss": 0.6014, + "step": 78680 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001687894736842105, + "loss": 0.5962, + "step": 78690 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016871052631578948, + "loss": 0.5967, + "step": 78700 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001686315789473684, + "loss": 0.5962, + "step": 78710 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016855263157894735, + "loss": 0.5877, + "step": 78720 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001684736842105263, + "loss": 0.5971, + "step": 78730 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016839473684210525, + "loss": 0.5903, + "step": 78740 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001683157894736842, + "loss": 0.5888, + "step": 78750 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016823684210526315, + "loss": 0.5827, + "step": 78760 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016815789473684207, + "loss": 0.581, + "step": 78770 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016807894736842102, + "loss": 0.574, + "step": 78780 + }, + { + "epoch": 0.79, + "learning_rate": 0.000168, + "loss": 0.5862, + "step": 78790 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016792105263157894, + "loss": 0.5846, + "step": 78800 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001678421052631579, + "loss": 0.595, + "step": 78810 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016776315789473684, + "loss": 0.5791, + "step": 78820 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016768421052631578, + "loss": 0.5823, + "step": 78830 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016760526315789473, + "loss": 0.5918, + "step": 78840 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016752631578947365, + "loss": 0.5902, + "step": 78850 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001674473684210526, + "loss": 0.5868, + "step": 78860 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016736842105263155, + "loss": 0.5845, + "step": 78870 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001672894736842105, + "loss": 0.5821, + "step": 78880 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016721052631578947, + "loss": 0.5929, + "step": 78890 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016713157894736842, + "loss": 0.5958, + "step": 78900 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016705263157894737, + "loss": 0.5871, + "step": 78910 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001669736842105263, + "loss": 0.5838, + "step": 78920 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016689473684210524, + "loss": 0.5899, + "step": 78930 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001668157894736842, + "loss": 0.5921, + "step": 78940 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016673684210526314, + "loss": 0.5951, + "step": 78950 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016665789473684208, + "loss": 0.5961, + "step": 78960 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016657894736842103, + "loss": 0.5973, + "step": 78970 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001665, + "loss": 0.5763, + "step": 78980 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016642105263157896, + "loss": 0.5862, + "step": 78990 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016634210526315788, + "loss": 0.592, + "step": 79000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016626315789473683, + "loss": 0.5803, + "step": 79010 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016618421052631577, + "loss": 0.5817, + "step": 79020 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016610526315789472, + "loss": 0.5859, + "step": 79030 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016602631578947367, + "loss": 0.5815, + "step": 79040 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016594736842105262, + "loss": 0.5921, + "step": 79050 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016586842105263154, + "loss": 0.5914, + "step": 79060 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016578947368421052, + "loss": 0.58, + "step": 79070 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016571052631578946, + "loss": 0.5861, + "step": 79080 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001656315789473684, + "loss": 0.5866, + "step": 79090 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016555263157894736, + "loss": 0.5887, + "step": 79100 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001654736842105263, + "loss": 0.5906, + "step": 79110 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016539473684210526, + "loss": 0.5837, + "step": 79120 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016531578947368418, + "loss": 0.585, + "step": 79130 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016523684210526313, + "loss": 0.5758, + "step": 79140 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016515789473684208, + "loss": 0.5805, + "step": 79150 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016507894736842102, + "loss": 0.5879, + "step": 79160 + }, + { + "epoch": 0.79, + "learning_rate": 0.000165, + "loss": 0.5835, + "step": 79170 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016492105263157895, + "loss": 0.5816, + "step": 79180 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001648421052631579, + "loss": 0.5812, + "step": 79190 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016476315789473682, + "loss": 0.5868, + "step": 79200 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016468421052631577, + "loss": 0.5846, + "step": 79210 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001646052631578947, + "loss": 0.6016, + "step": 79220 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016452631578947366, + "loss": 0.5815, + "step": 79230 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001644473684210526, + "loss": 0.5742, + "step": 79240 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016436842105263156, + "loss": 0.5834, + "step": 79250 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016428947368421053, + "loss": 0.5736, + "step": 79260 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016421052631578948, + "loss": 0.5911, + "step": 79270 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001641315789473684, + "loss": 0.5774, + "step": 79280 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016405263157894735, + "loss": 0.5825, + "step": 79290 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001639736842105263, + "loss": 0.5693, + "step": 79300 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016389473684210525, + "loss": 0.561, + "step": 79310 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001638157894736842, + "loss": 0.573, + "step": 79320 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016373684210526314, + "loss": 0.5787, + "step": 79330 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016365789473684207, + "loss": 0.5856, + "step": 79340 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016357894736842104, + "loss": 0.5764, + "step": 79350 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001635, + "loss": 0.569, + "step": 79360 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016342105263157894, + "loss": 0.5721, + "step": 79370 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016334210526315789, + "loss": 0.5951, + "step": 79380 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016326315789473683, + "loss": 0.5956, + "step": 79390 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016318421052631578, + "loss": 0.5996, + "step": 79400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001631052631578947, + "loss": 0.5883, + "step": 79410 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016302631578947365, + "loss": 0.5969, + "step": 79420 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001629473684210526, + "loss": 0.5874, + "step": 79430 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016286842105263155, + "loss": 0.589, + "step": 79440 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016278947368421052, + "loss": 0.588, + "step": 79450 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016271052631578947, + "loss": 0.5926, + "step": 79460 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016263157894736842, + "loss": 0.5965, + "step": 79470 + }, + { + "epoch": 0.79, + "learning_rate": 0.00016255263157894737, + "loss": 0.5897, + "step": 79480 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001624736842105263, + "loss": 0.5836, + "step": 79490 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016239473684210524, + "loss": 0.58, + "step": 79500 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001623157894736842, + "loss": 0.581, + "step": 79510 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016223684210526314, + "loss": 0.5848, + "step": 79520 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016215789473684208, + "loss": 0.5796, + "step": 79530 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016207894736842106, + "loss": 0.5901, + "step": 79540 + }, + { + "epoch": 0.8, + "learning_rate": 0.000162, + "loss": 0.5849, + "step": 79550 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016192105263157893, + "loss": 0.5809, + "step": 79560 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016184210526315788, + "loss": 0.5734, + "step": 79570 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016176315789473683, + "loss": 0.5786, + "step": 79580 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016168421052631577, + "loss": 0.5781, + "step": 79590 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016160526315789472, + "loss": 0.5922, + "step": 79600 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016152631578947367, + "loss": 0.5848, + "step": 79610 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001614473684210526, + "loss": 0.5804, + "step": 79620 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016136842105263157, + "loss": 0.5825, + "step": 79630 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016128947368421052, + "loss": 0.5732, + "step": 79640 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016121052631578946, + "loss": 0.5813, + "step": 79650 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001611315789473684, + "loss": 0.5909, + "step": 79660 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016105263157894736, + "loss": 0.5927, + "step": 79670 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001609736842105263, + "loss": 0.5889, + "step": 79680 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016089473684210523, + "loss": 0.5828, + "step": 79690 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016081578947368418, + "loss": 0.595, + "step": 79700 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016073684210526313, + "loss": 0.5848, + "step": 79710 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001606578947368421, + "loss": 0.5866, + "step": 79720 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016057894736842105, + "loss": 0.5735, + "step": 79730 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001605, + "loss": 0.5765, + "step": 79740 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016042105263157895, + "loss": 0.5838, + "step": 79750 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001603421052631579, + "loss": 0.5802, + "step": 79760 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016026315789473682, + "loss": 0.5899, + "step": 79770 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016018421052631576, + "loss": 0.5889, + "step": 79780 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001601052631578947, + "loss": 0.5812, + "step": 79790 + }, + { + "epoch": 0.8, + "learning_rate": 0.00016002631578947366, + "loss": 0.5699, + "step": 79800 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001599473684210526, + "loss": 0.5774, + "step": 79810 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015986842105263158, + "loss": 0.5809, + "step": 79820 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015978947368421053, + "loss": 0.5825, + "step": 79830 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015971052631578945, + "loss": 0.5816, + "step": 79840 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001596315789473684, + "loss": 0.5878, + "step": 79850 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015955263157894735, + "loss": 0.586, + "step": 79860 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001594736842105263, + "loss": 0.5965, + "step": 79870 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015939473684210525, + "loss": 0.5867, + "step": 79880 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001593157894736842, + "loss": 0.5934, + "step": 79890 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015923684210526312, + "loss": 0.5853, + "step": 79900 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015915789473684212, + "loss": 0.5937, + "step": 79910 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015907894736842104, + "loss": 0.5861, + "step": 79920 + }, + { + "epoch": 0.8, + "learning_rate": 0.000159, + "loss": 0.5847, + "step": 79930 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015892105263157894, + "loss": 0.5819, + "step": 79940 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015884210526315789, + "loss": 0.5847, + "step": 79950 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015876315789473683, + "loss": 0.5844, + "step": 79960 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015868421052631578, + "loss": 0.5868, + "step": 79970 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001586052631578947, + "loss": 0.5931, + "step": 79980 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015852631578947365, + "loss": 0.5832, + "step": 79990 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015844736842105263, + "loss": 0.5835, + "step": 80000 + }, + { + "epoch": 0.8, + "eval_accuracy": 0.8784877003135946, + "eval_loss": 0.55615234375, + "eval_runtime": 99.7857, + "eval_samples_per_second": 801.718, + "eval_steps_per_second": 1.573, + "step": 80000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015836842105263158, + "loss": 0.5931, + "step": 80010 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015828947368421052, + "loss": 0.5987, + "step": 80020 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015821052631578947, + "loss": 0.5891, + "step": 80030 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015813157894736842, + "loss": 0.5803, + "step": 80040 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015805263157894734, + "loss": 0.5794, + "step": 80050 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001579736842105263, + "loss": 0.5788, + "step": 80060 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015789473684210524, + "loss": 0.5733, + "step": 80070 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015781578947368419, + "loss": 0.5641, + "step": 80080 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015773684210526313, + "loss": 0.5627, + "step": 80090 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001576578947368421, + "loss": 0.5655, + "step": 80100 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015757894736842106, + "loss": 0.5681, + "step": 80110 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015749999999999998, + "loss": 0.5754, + "step": 80120 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015742105263157893, + "loss": 0.5619, + "step": 80130 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015734210526315788, + "loss": 0.5702, + "step": 80140 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015726315789473682, + "loss": 0.574, + "step": 80150 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015718421052631577, + "loss": 0.5703, + "step": 80160 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015710526315789472, + "loss": 0.5702, + "step": 80170 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015702631578947364, + "loss": 0.5722, + "step": 80180 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015694736842105264, + "loss": 0.5736, + "step": 80190 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015686842105263157, + "loss": 0.5659, + "step": 80200 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015678947368421051, + "loss": 0.5753, + "step": 80210 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015671052631578946, + "loss": 0.5607, + "step": 80220 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001566315789473684, + "loss": 0.5664, + "step": 80230 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015655263157894736, + "loss": 0.5535, + "step": 80240 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001564736842105263, + "loss": 0.5607, + "step": 80250 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015639473684210523, + "loss": 0.5691, + "step": 80260 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015631578947368418, + "loss": 0.563, + "step": 80270 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015623684210526315, + "loss": 0.56, + "step": 80280 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001561578947368421, + "loss": 0.5626, + "step": 80290 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015607894736842105, + "loss": 0.5638, + "step": 80300 + }, + { + "epoch": 0.8, + "learning_rate": 0.000156, + "loss": 0.5645, + "step": 80310 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015592105263157895, + "loss": 0.5762, + "step": 80320 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015584210526315787, + "loss": 0.5436, + "step": 80330 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015576315789473681, + "loss": 0.5765, + "step": 80340 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015568421052631576, + "loss": 0.5858, + "step": 80350 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001556052631578947, + "loss": 0.5911, + "step": 80360 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015552631578947366, + "loss": 0.5866, + "step": 80370 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015544736842105264, + "loss": 0.5814, + "step": 80380 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015536842105263158, + "loss": 0.583, + "step": 80390 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015528947368421053, + "loss": 0.5753, + "step": 80400 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015521052631578945, + "loss": 0.595, + "step": 80410 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001551315789473684, + "loss": 0.5942, + "step": 80420 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015505263157894735, + "loss": 0.5923, + "step": 80430 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001549736842105263, + "loss": 0.5812, + "step": 80440 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015489473684210525, + "loss": 0.5761, + "step": 80450 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001548157894736842, + "loss": 0.5892, + "step": 80460 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015473684210526317, + "loss": 0.583, + "step": 80470 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001546578947368421, + "loss": 0.5818, + "step": 80480 + }, + { + "epoch": 0.8, + "learning_rate": 0.00015457894736842104, + "loss": 0.5782, + "step": 80490 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001545, + "loss": 0.5796, + "step": 80500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015442105263157894, + "loss": 0.5846, + "step": 80510 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015434210526315788, + "loss": 0.5836, + "step": 80520 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015426315789473683, + "loss": 0.5852, + "step": 80530 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015418421052631575, + "loss": 0.5989, + "step": 80540 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001541052631578947, + "loss": 0.5874, + "step": 80550 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015402631578947368, + "loss": 0.5817, + "step": 80560 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015394736842105263, + "loss": 0.5643, + "step": 80570 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015387631578947367, + "loss": 0.5793, + "step": 80580 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015379736842105262, + "loss": 0.5642, + "step": 80590 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015371842105263157, + "loss": 0.5814, + "step": 80600 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001536394736842105, + "loss": 0.5886, + "step": 80610 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015356052631578944, + "loss": 0.5893, + "step": 80620 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015348157894736842, + "loss": 0.5857, + "step": 80630 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015340263157894736, + "loss": 0.5738, + "step": 80640 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001533236842105263, + "loss": 0.5745, + "step": 80650 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015324473684210526, + "loss": 0.5835, + "step": 80660 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001531657894736842, + "loss": 0.5711, + "step": 80670 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015308684210526313, + "loss": 0.5938, + "step": 80680 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015300789473684208, + "loss": 0.5784, + "step": 80690 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015292894736842103, + "loss": 0.5649, + "step": 80700 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015284999999999997, + "loss": 0.5775, + "step": 80710 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015277105263157895, + "loss": 0.5788, + "step": 80720 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001526921052631579, + "loss": 0.5825, + "step": 80730 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015261315789473685, + "loss": 0.5768, + "step": 80740 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015253421052631577, + "loss": 0.5765, + "step": 80750 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015245526315789472, + "loss": 0.5839, + "step": 80760 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015237631578947366, + "loss": 0.5694, + "step": 80770 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001522973684210526, + "loss": 0.5798, + "step": 80780 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015221842105263156, + "loss": 0.5916, + "step": 80790 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001521394736842105, + "loss": 0.5865, + "step": 80800 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015206052631578943, + "loss": 0.585, + "step": 80810 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015198157894736843, + "loss": 0.5772, + "step": 80820 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015190263157894735, + "loss": 0.5854, + "step": 80830 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001518236842105263, + "loss": 0.5884, + "step": 80840 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015174473684210525, + "loss": 0.5866, + "step": 80850 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001516657894736842, + "loss": 0.5776, + "step": 80860 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015158684210526315, + "loss": 0.5877, + "step": 80870 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001515078947368421, + "loss": 0.5977, + "step": 80880 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015142894736842102, + "loss": 0.5751, + "step": 80890 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015134999999999997, + "loss": 0.5772, + "step": 80900 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015127105263157894, + "loss": 0.5785, + "step": 80910 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001511921052631579, + "loss": 0.5769, + "step": 80920 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015111315789473684, + "loss": 0.5816, + "step": 80930 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015103421052631579, + "loss": 0.5778, + "step": 80940 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015095526315789473, + "loss": 0.5827, + "step": 80950 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015087631578947366, + "loss": 0.5702, + "step": 80960 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001507973684210526, + "loss": 0.5725, + "step": 80970 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015071842105263155, + "loss": 0.5662, + "step": 80980 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001506394736842105, + "loss": 0.5715, + "step": 80990 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015056052631578948, + "loss": 0.571, + "step": 81000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015048157894736842, + "loss": 0.5752, + "step": 81010 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015040263157894737, + "loss": 0.5633, + "step": 81020 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015032368421052632, + "loss": 0.5713, + "step": 81030 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015024473684210524, + "loss": 0.5765, + "step": 81040 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001501657894736842, + "loss": 0.5673, + "step": 81050 + }, + { + "epoch": 0.81, + "learning_rate": 0.00015008684210526314, + "loss": 0.5729, + "step": 81060 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001500078947368421, + "loss": 0.5759, + "step": 81070 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014992894736842103, + "loss": 0.5741, + "step": 81080 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014984999999999998, + "loss": 0.5722, + "step": 81090 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014977105263157893, + "loss": 0.5785, + "step": 81100 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014969210526315788, + "loss": 0.5802, + "step": 81110 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014961315789473683, + "loss": 0.5773, + "step": 81120 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014953421052631578, + "loss": 0.5748, + "step": 81130 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014945526315789472, + "loss": 0.5515, + "step": 81140 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014937631578947367, + "loss": 0.5761, + "step": 81150 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014929736842105262, + "loss": 0.5603, + "step": 81160 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014921842105263157, + "loss": 0.5695, + "step": 81170 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014913947368421052, + "loss": 0.5781, + "step": 81180 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014906052631578947, + "loss": 0.5679, + "step": 81190 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014898157894736841, + "loss": 0.5697, + "step": 81200 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014890263157894736, + "loss": 0.5576, + "step": 81210 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001488236842105263, + "loss": 0.5711, + "step": 81220 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014874473684210526, + "loss": 0.5682, + "step": 81230 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014866578947368418, + "loss": 0.5714, + "step": 81240 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014858684210526313, + "loss": 0.5631, + "step": 81250 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001485078947368421, + "loss": 0.5921, + "step": 81260 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014842894736842105, + "loss": 0.5783, + "step": 81270 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014834999999999997, + "loss": 0.5733, + "step": 81280 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014827105263157892, + "loss": 0.5809, + "step": 81290 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001481921052631579, + "loss": 0.5854, + "step": 81300 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014811315789473685, + "loss": 0.5747, + "step": 81310 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014803421052631577, + "loss": 0.5947, + "step": 81320 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014795526315789472, + "loss": 0.5814, + "step": 81330 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014787631578947366, + "loss": 0.5882, + "step": 81340 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001477973684210526, + "loss": 0.574, + "step": 81350 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014771842105263156, + "loss": 0.5781, + "step": 81360 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001476394736842105, + "loss": 0.5737, + "step": 81370 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014756052631578946, + "loss": 0.577, + "step": 81380 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001474815789473684, + "loss": 0.5697, + "step": 81390 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014740263157894735, + "loss": 0.5641, + "step": 81400 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001473236842105263, + "loss": 0.5671, + "step": 81410 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014724473684210525, + "loss": 0.5739, + "step": 81420 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001471657894736842, + "loss": 0.5815, + "step": 81430 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014708684210526315, + "loss": 0.5678, + "step": 81440 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001470078947368421, + "loss": 0.5733, + "step": 81450 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014692894736842104, + "loss": 0.5691, + "step": 81460 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014685, + "loss": 0.5652, + "step": 81470 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014677105263157894, + "loss": 0.578, + "step": 81480 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001466921052631579, + "loss": 0.571, + "step": 81490 + }, + { + "epoch": 0.81, + "learning_rate": 0.00014661315789473684, + "loss": 0.5736, + "step": 81500 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014653421052631578, + "loss": 0.5844, + "step": 81510 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014645526315789473, + "loss": 0.5876, + "step": 81520 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014637631578947365, + "loss": 0.5787, + "step": 81530 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014629736842105263, + "loss": 0.5773, + "step": 81540 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014621842105263158, + "loss": 0.5797, + "step": 81550 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001461394736842105, + "loss": 0.5792, + "step": 81560 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014606052631578945, + "loss": 0.5893, + "step": 81570 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014598947368421052, + "loss": 0.5873, + "step": 81580 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014591052631578944, + "loss": 0.5808, + "step": 81590 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014583157894736842, + "loss": 0.5913, + "step": 81600 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014575263157894737, + "loss": 0.5545, + "step": 81610 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014567368421052632, + "loss": 0.557, + "step": 81620 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014559473684210524, + "loss": 0.5605, + "step": 81630 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014551578947368419, + "loss": 0.5748, + "step": 81640 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014543684210526316, + "loss": 0.5755, + "step": 81650 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014535789473684208, + "loss": 0.582, + "step": 81660 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014527894736842103, + "loss": 0.5609, + "step": 81670 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014519999999999998, + "loss": 0.5772, + "step": 81680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014512105263157893, + "loss": 0.5669, + "step": 81690 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014504210526315788, + "loss": 0.5783, + "step": 81700 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014496315789473682, + "loss": 0.5776, + "step": 81710 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014488421052631577, + "loss": 0.5697, + "step": 81720 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014480526315789472, + "loss": 0.5707, + "step": 81730 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014472631578947367, + "loss": 0.5693, + "step": 81740 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014464736842105262, + "loss": 0.5816, + "step": 81750 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014456842105263157, + "loss": 0.5844, + "step": 81760 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001444894736842105, + "loss": 0.574, + "step": 81770 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014441052631578946, + "loss": 0.5768, + "step": 81780 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001443315789473684, + "loss": 0.5779, + "step": 81790 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014425263157894736, + "loss": 0.5867, + "step": 81800 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001441736842105263, + "loss": 0.5844, + "step": 81810 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014409473684210525, + "loss": 0.5804, + "step": 81820 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001440157894736842, + "loss": 0.5758, + "step": 81830 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014393684210526315, + "loss": 0.5703, + "step": 81840 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001438578947368421, + "loss": 0.5617, + "step": 81850 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014377894736842105, + "loss": 0.5782, + "step": 81860 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014369999999999997, + "loss": 0.5749, + "step": 81870 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014362105263157894, + "loss": 0.5667, + "step": 81880 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001435421052631579, + "loss": 0.5723, + "step": 81890 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014346315789473684, + "loss": 0.5713, + "step": 81900 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014338421052631576, + "loss": 0.5775, + "step": 81910 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001433052631578947, + "loss": 0.5775, + "step": 81920 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014322631578947369, + "loss": 0.5758, + "step": 81930 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014314736842105263, + "loss": 0.5675, + "step": 81940 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014306842105263156, + "loss": 0.5707, + "step": 81950 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001429894736842105, + "loss": 0.5756, + "step": 81960 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014291052631578945, + "loss": 0.5799, + "step": 81970 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001428315789473684, + "loss": 0.5749, + "step": 81980 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014275263157894735, + "loss": 0.5787, + "step": 81990 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001426736842105263, + "loss": 0.5879, + "step": 82000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014259473684210525, + "loss": 0.5665, + "step": 82010 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001425157894736842, + "loss": 0.5831, + "step": 82020 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014243684210526314, + "loss": 0.5762, + "step": 82030 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001423578947368421, + "loss": 0.5659, + "step": 82040 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014227894736842104, + "loss": 0.5767, + "step": 82050 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001422, + "loss": 0.5791, + "step": 82060 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014212105263157894, + "loss": 0.5824, + "step": 82070 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014204210526315788, + "loss": 0.5663, + "step": 82080 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014196315789473683, + "loss": 0.5722, + "step": 82090 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014188421052631578, + "loss": 0.5732, + "step": 82100 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014180526315789473, + "loss": 0.5631, + "step": 82110 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014172631578947368, + "loss": 0.5808, + "step": 82120 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014164736842105263, + "loss": 0.5758, + "step": 82130 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014156842105263157, + "loss": 0.5772, + "step": 82140 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001414894736842105, + "loss": 0.5568, + "step": 82150 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014141052631578947, + "loss": 0.5579, + "step": 82160 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014133157894736842, + "loss": 0.557, + "step": 82170 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014125263157894737, + "loss": 0.5525, + "step": 82180 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001411736842105263, + "loss": 0.5627, + "step": 82190 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014109473684210524, + "loss": 0.5533, + "step": 82200 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001410157894736842, + "loss": 0.5594, + "step": 82210 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014093684210526316, + "loss": 0.5646, + "step": 82220 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014085789473684208, + "loss": 0.5733, + "step": 82230 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014077894736842103, + "loss": 0.5598, + "step": 82240 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014069999999999998, + "loss": 0.5533, + "step": 82250 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014062105263157895, + "loss": 0.5643, + "step": 82260 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014054210526315787, + "loss": 0.5852, + "step": 82270 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014046315789473682, + "loss": 0.5951, + "step": 82280 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014038421052631577, + "loss": 0.5833, + "step": 82290 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014030526315789472, + "loss": 0.5765, + "step": 82300 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014022631578947367, + "loss": 0.569, + "step": 82310 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014014736842105262, + "loss": 0.5765, + "step": 82320 + }, + { + "epoch": 0.82, + "learning_rate": 0.00014006842105263156, + "loss": 0.5693, + "step": 82330 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001399894736842105, + "loss": 0.5802, + "step": 82340 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013991052631578946, + "loss": 0.5775, + "step": 82350 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001398315789473684, + "loss": 0.5754, + "step": 82360 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013975263157894736, + "loss": 0.5753, + "step": 82370 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001396736842105263, + "loss": 0.5813, + "step": 82380 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013959473684210525, + "loss": 0.5823, + "step": 82390 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001395157894736842, + "loss": 0.5783, + "step": 82400 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013943684210526315, + "loss": 0.5688, + "step": 82410 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001393578947368421, + "loss": 0.584, + "step": 82420 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013927894736842105, + "loss": 0.5766, + "step": 82430 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001392, + "loss": 0.5804, + "step": 82440 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013912105263157894, + "loss": 0.587, + "step": 82450 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001390421052631579, + "loss": 0.58, + "step": 82460 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001389631578947368, + "loss": 0.5762, + "step": 82470 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013888421052631576, + "loss": 0.5972, + "step": 82480 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013880526315789474, + "loss": 0.5743, + "step": 82490 + }, + { + "epoch": 0.82, + "learning_rate": 0.00013872631578947369, + "loss": 0.5889, + "step": 82500 + }, + { + "epoch": 0.82, + "eval_accuracy": 0.8794982706220447, + "eval_loss": 0.5517578125, + "eval_runtime": 100.2401, + "eval_samples_per_second": 798.084, + "eval_steps_per_second": 1.566, + "step": 82500 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001386473684210526, + "loss": 0.5757, + "step": 82510 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013856842105263155, + "loss": 0.5756, + "step": 82520 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013848947368421053, + "loss": 0.5738, + "step": 82530 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013841052631578948, + "loss": 0.5746, + "step": 82540 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013833947368421053, + "loss": 0.5783, + "step": 82550 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013826052631578947, + "loss": 0.5797, + "step": 82560 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013818157894736842, + "loss": 0.5635, + "step": 82570 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013810263157894734, + "loss": 0.5661, + "step": 82580 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001380236842105263, + "loss": 0.5643, + "step": 82590 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013794473684210527, + "loss": 0.5672, + "step": 82600 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001378657894736842, + "loss": 0.5687, + "step": 82610 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013778684210526314, + "loss": 0.5608, + "step": 82620 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013770789473684209, + "loss": 0.5723, + "step": 82630 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013762894736842103, + "loss": 0.5653, + "step": 82640 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013754999999999998, + "loss": 0.5741, + "step": 82650 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013747105263157893, + "loss": 0.5806, + "step": 82660 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013739210526315788, + "loss": 0.5871, + "step": 82670 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013731315789473683, + "loss": 0.5728, + "step": 82680 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013723421052631578, + "loss": 0.5813, + "step": 82690 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013715526315789472, + "loss": 0.589, + "step": 82700 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013707631578947367, + "loss": 0.5815, + "step": 82710 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013699736842105262, + "loss": 0.5947, + "step": 82720 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013691842105263157, + "loss": 0.575, + "step": 82730 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013683947368421052, + "loss": 0.5756, + "step": 82740 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013676052631578947, + "loss": 0.5771, + "step": 82750 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013668157894736841, + "loss": 0.5708, + "step": 82760 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013660263157894736, + "loss": 0.5732, + "step": 82770 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013652368421052628, + "loss": 0.5662, + "step": 82780 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013644473684210526, + "loss": 0.5629, + "step": 82790 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001363657894736842, + "loss": 0.5633, + "step": 82800 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013628684210526316, + "loss": 0.5677, + "step": 82810 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013620789473684208, + "loss": 0.5756, + "step": 82820 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013612894736842105, + "loss": 0.5683, + "step": 82830 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013605, + "loss": 0.5652, + "step": 82840 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013597105263157895, + "loss": 0.5726, + "step": 82850 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013589210526315787, + "loss": 0.5673, + "step": 82860 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013581315789473682, + "loss": 0.5751, + "step": 82870 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001357342105263158, + "loss": 0.5849, + "step": 82880 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013565526315789471, + "loss": 0.5781, + "step": 82890 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013557631578947366, + "loss": 0.5906, + "step": 82900 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001354973684210526, + "loss": 0.5825, + "step": 82910 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013541842105263156, + "loss": 0.5814, + "step": 82920 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001353394736842105, + "loss": 0.5805, + "step": 82930 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013526052631578946, + "loss": 0.59, + "step": 82940 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001351815789473684, + "loss": 0.5801, + "step": 82950 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013510263157894735, + "loss": 0.5804, + "step": 82960 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001350236842105263, + "loss": 0.5688, + "step": 82970 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013494473684210525, + "loss": 0.5784, + "step": 82980 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001348657894736842, + "loss": 0.5782, + "step": 82990 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013478684210526315, + "loss": 0.5714, + "step": 83000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001347078947368421, + "loss": 0.5778, + "step": 83010 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013462894736842104, + "loss": 0.5644, + "step": 83020 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013455, + "loss": 0.5734, + "step": 83030 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013447105263157894, + "loss": 0.5741, + "step": 83040 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001343921052631579, + "loss": 0.571, + "step": 83050 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013431315789473684, + "loss": 0.57, + "step": 83060 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013423421052631578, + "loss": 0.5812, + "step": 83070 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013415526315789473, + "loss": 0.5815, + "step": 83080 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013407631578947368, + "loss": 0.5782, + "step": 83090 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001339973684210526, + "loss": 0.5726, + "step": 83100 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013391842105263158, + "loss": 0.5714, + "step": 83110 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013383947368421053, + "loss": 0.5716, + "step": 83120 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013376052631578947, + "loss": 0.5585, + "step": 83130 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001336815789473684, + "loss": 0.5718, + "step": 83140 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013360263157894734, + "loss": 0.5602, + "step": 83150 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013352368421052632, + "loss": 0.5593, + "step": 83160 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013344473684210527, + "loss": 0.5625, + "step": 83170 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001333657894736842, + "loss": 0.572, + "step": 83180 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013328684210526314, + "loss": 0.5735, + "step": 83190 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013320789473684208, + "loss": 0.5645, + "step": 83200 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013312894736842103, + "loss": 0.5663, + "step": 83210 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013304999999999998, + "loss": 0.5636, + "step": 83220 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013297105263157893, + "loss": 0.5556, + "step": 83230 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013289210526315788, + "loss": 0.5663, + "step": 83240 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013281315789473683, + "loss": 0.5591, + "step": 83250 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013273421052631577, + "loss": 0.5604, + "step": 83260 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013265526315789472, + "loss": 0.5677, + "step": 83270 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013257631578947367, + "loss": 0.5581, + "step": 83280 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013249736842105262, + "loss": 0.5548, + "step": 83290 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013241842105263157, + "loss": 0.5691, + "step": 83300 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013233947368421052, + "loss": 0.5538, + "step": 83310 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013226052631578946, + "loss": 0.563, + "step": 83320 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001321815789473684, + "loss": 0.5574, + "step": 83330 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013210263157894736, + "loss": 0.5488, + "step": 83340 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001320236842105263, + "loss": 0.5604, + "step": 83350 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013194473684210526, + "loss": 0.562, + "step": 83360 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001318657894736842, + "loss": 0.5502, + "step": 83370 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013178684210526313, + "loss": 0.5579, + "step": 83380 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001317078947368421, + "loss": 0.5547, + "step": 83390 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013162894736842105, + "loss": 0.5577, + "step": 83400 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013155, + "loss": 0.5585, + "step": 83410 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013147105263157892, + "loss": 0.5655, + "step": 83420 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013139210526315787, + "loss": 0.5482, + "step": 83430 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013131315789473684, + "loss": 0.5582, + "step": 83440 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001312342105263158, + "loss": 0.5532, + "step": 83450 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013115526315789471, + "loss": 0.5531, + "step": 83460 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013107631578947366, + "loss": 0.5593, + "step": 83470 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001309973684210526, + "loss": 0.5784, + "step": 83480 + }, + { + "epoch": 0.83, + "learning_rate": 0.00013091842105263159, + "loss": 0.5901, + "step": 83490 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001308394736842105, + "loss": 0.5839, + "step": 83500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013076052631578946, + "loss": 0.5809, + "step": 83510 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001306815789473684, + "loss": 0.5645, + "step": 83520 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013060263157894735, + "loss": 0.557, + "step": 83530 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001305236842105263, + "loss": 0.5608, + "step": 83540 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013044473684210525, + "loss": 0.5725, + "step": 83550 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001303657894736842, + "loss": 0.5722, + "step": 83560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013028684210526314, + "loss": 0.5826, + "step": 83570 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001302078947368421, + "loss": 0.5803, + "step": 83580 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013012894736842104, + "loss": 0.5769, + "step": 83590 + }, + { + "epoch": 0.84, + "learning_rate": 0.00013005, + "loss": 0.5756, + "step": 83600 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012997105263157894, + "loss": 0.5828, + "step": 83610 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012989210526315789, + "loss": 0.5682, + "step": 83620 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012981315789473683, + "loss": 0.5652, + "step": 83630 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012973421052631578, + "loss": 0.5668, + "step": 83640 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012965526315789473, + "loss": 0.5774, + "step": 83650 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012957631578947368, + "loss": 0.5789, + "step": 83660 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012949736842105263, + "loss": 0.5753, + "step": 83670 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012941842105263158, + "loss": 0.5741, + "step": 83680 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012933947368421052, + "loss": 0.577, + "step": 83690 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012926052631578945, + "loss": 0.5776, + "step": 83700 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001291815789473684, + "loss": 0.581, + "step": 83710 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012910263157894737, + "loss": 0.5839, + "step": 83720 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012902368421052632, + "loss": 0.5858, + "step": 83730 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012894473684210524, + "loss": 0.5822, + "step": 83740 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001288657894736842, + "loss": 0.5686, + "step": 83750 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012878684210526314, + "loss": 0.5732, + "step": 83760 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001287078947368421, + "loss": 0.5713, + "step": 83770 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012862894736842103, + "loss": 0.5744, + "step": 83780 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012854999999999998, + "loss": 0.5802, + "step": 83790 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012847105263157893, + "loss": 0.5658, + "step": 83800 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012839210526315788, + "loss": 0.5678, + "step": 83810 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012831315789473683, + "loss": 0.5731, + "step": 83820 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012823421052631577, + "loss": 0.567, + "step": 83830 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012815526315789472, + "loss": 0.5827, + "step": 83840 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012807631578947367, + "loss": 0.5555, + "step": 83850 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012799736842105262, + "loss": 0.5572, + "step": 83860 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012791842105263157, + "loss": 0.5613, + "step": 83870 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012783947368421052, + "loss": 0.5544, + "step": 83880 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012776052631578946, + "loss": 0.5537, + "step": 83890 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001276815789473684, + "loss": 0.5513, + "step": 83900 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012760263157894736, + "loss": 0.5503, + "step": 83910 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001275236842105263, + "loss": 0.5586, + "step": 83920 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012744473684210526, + "loss": 0.5536, + "step": 83930 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001273657894736842, + "loss": 0.559, + "step": 83940 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012728684210526315, + "loss": 0.5508, + "step": 83950 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001272078947368421, + "loss": 0.5663, + "step": 83960 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012712894736842105, + "loss": 0.5546, + "step": 83970 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012705, + "loss": 0.5634, + "step": 83980 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012697105263157892, + "loss": 0.551, + "step": 83990 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001268921052631579, + "loss": 0.563, + "step": 84000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012681315789473684, + "loss": 0.5495, + "step": 84010 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012673421052631576, + "loss": 0.5614, + "step": 84020 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001266552631578947, + "loss": 0.5684, + "step": 84030 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001265763157894737, + "loss": 0.5754, + "step": 84040 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012649736842105264, + "loss": 0.5728, + "step": 84050 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012641842105263156, + "loss": 0.577, + "step": 84060 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001263394736842105, + "loss": 0.5716, + "step": 84070 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012626052631578945, + "loss": 0.5776, + "step": 84080 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012618157894736843, + "loss": 0.5699, + "step": 84090 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012610263157894735, + "loss": 0.5577, + "step": 84100 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001260236842105263, + "loss": 0.5633, + "step": 84110 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012594473684210525, + "loss": 0.5601, + "step": 84120 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001258657894736842, + "loss": 0.5728, + "step": 84130 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012578684210526314, + "loss": 0.5721, + "step": 84140 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001257078947368421, + "loss": 0.5712, + "step": 84150 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012562894736842104, + "loss": 0.5693, + "step": 84160 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012555, + "loss": 0.5685, + "step": 84170 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012547105263157894, + "loss": 0.5685, + "step": 84180 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012539210526315789, + "loss": 0.5807, + "step": 84190 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012531315789473683, + "loss": 0.5684, + "step": 84200 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012523421052631578, + "loss": 0.5761, + "step": 84210 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012515526315789473, + "loss": 0.5613, + "step": 84220 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012507631578947368, + "loss": 0.5687, + "step": 84230 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012499736842105263, + "loss": 0.5559, + "step": 84240 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012491842105263158, + "loss": 0.5647, + "step": 84250 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012483947368421052, + "loss": 0.5591, + "step": 84260 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012476052631578944, + "loss": 0.5701, + "step": 84270 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012468157894736842, + "loss": 0.5585, + "step": 84280 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012460263157894737, + "loss": 0.5514, + "step": 84290 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012452368421052632, + "loss": 0.5582, + "step": 84300 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012444473684210524, + "loss": 0.562, + "step": 84310 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001243657894736842, + "loss": 0.564, + "step": 84320 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012428684210526316, + "loss": 0.5697, + "step": 84330 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012420789473684208, + "loss": 0.5632, + "step": 84340 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012412894736842103, + "loss": 0.5635, + "step": 84350 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012404999999999998, + "loss": 0.5632, + "step": 84360 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012397105263157895, + "loss": 0.5672, + "step": 84370 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012389210526315788, + "loss": 0.5793, + "step": 84380 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012381315789473682, + "loss": 0.5808, + "step": 84390 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012373421052631577, + "loss": 0.564, + "step": 84400 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012365526315789472, + "loss": 0.5755, + "step": 84410 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012357631578947367, + "loss": 0.5771, + "step": 84420 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012349736842105262, + "loss": 0.5823, + "step": 84430 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012341842105263157, + "loss": 0.5742, + "step": 84440 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012333947368421051, + "loss": 0.5693, + "step": 84450 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012326052631578946, + "loss": 0.567, + "step": 84460 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001231815789473684, + "loss": 0.5529, + "step": 84470 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012310263157894736, + "loss": 0.5767, + "step": 84480 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001230236842105263, + "loss": 0.5642, + "step": 84490 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012294473684210526, + "loss": 0.5659, + "step": 84500 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001228657894736842, + "loss": 0.559, + "step": 84510 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012278684210526315, + "loss": 0.5606, + "step": 84520 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001227078947368421, + "loss": 0.5623, + "step": 84530 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012262894736842105, + "loss": 0.5589, + "step": 84540 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001225578947368421, + "loss": 0.5546, + "step": 84550 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012247894736842105, + "loss": 0.5621, + "step": 84560 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001224, + "loss": 0.5675, + "step": 84570 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012232105263157894, + "loss": 0.5709, + "step": 84580 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001222421052631579, + "loss": 0.5649, + "step": 84590 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012216315789473684, + "loss": 0.5662, + "step": 84600 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012208421052631576, + "loss": 0.567, + "step": 84610 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012200526315789474, + "loss": 0.574, + "step": 84620 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012192631578947368, + "loss": 0.5738, + "step": 84630 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012184736842105262, + "loss": 0.5599, + "step": 84640 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012176842105263157, + "loss": 0.573, + "step": 84650 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012168947368421051, + "loss": 0.5676, + "step": 84660 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012161052631578946, + "loss": 0.576, + "step": 84670 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012153157894736841, + "loss": 0.5811, + "step": 84680 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012145263157894736, + "loss": 0.5674, + "step": 84690 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001213736842105263, + "loss": 0.5682, + "step": 84700 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012129473684210524, + "loss": 0.5699, + "step": 84710 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001212157894736842, + "loss": 0.5615, + "step": 84720 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012113684210526315, + "loss": 0.5685, + "step": 84730 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012105789473684209, + "loss": 0.5693, + "step": 84740 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012097894736842104, + "loss": 0.5577, + "step": 84750 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001209, + "loss": 0.5568, + "step": 84760 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012082105263157895, + "loss": 0.5626, + "step": 84770 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012074210526315788, + "loss": 0.556, + "step": 84780 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012066315789473683, + "loss": 0.5557, + "step": 84790 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012058421052631578, + "loss": 0.5754, + "step": 84800 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012050526315789473, + "loss": 0.5728, + "step": 84810 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012042631578947367, + "loss": 0.5641, + "step": 84820 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012034736842105262, + "loss": 0.5645, + "step": 84830 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012026842105263157, + "loss": 0.5695, + "step": 84840 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001201894736842105, + "loss": 0.5729, + "step": 84850 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012011052631578947, + "loss": 0.5728, + "step": 84860 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012003157894736842, + "loss": 0.5681, + "step": 84870 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011995263157894735, + "loss": 0.5715, + "step": 84880 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001198736842105263, + "loss": 0.5862, + "step": 84890 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011979473684210526, + "loss": 0.5762, + "step": 84900 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011971578947368421, + "loss": 0.5818, + "step": 84910 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011963684210526314, + "loss": 0.5682, + "step": 84920 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011955789473684209, + "loss": 0.5733, + "step": 84930 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011947894736842104, + "loss": 0.5591, + "step": 84940 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001194, + "loss": 0.5695, + "step": 84950 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011932105263157894, + "loss": 0.5686, + "step": 84960 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011924210526315789, + "loss": 0.5588, + "step": 84970 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011916315789473683, + "loss": 0.5544, + "step": 84980 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011908421052631577, + "loss": 0.5651, + "step": 84990 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011900526315789473, + "loss": 0.5688, + "step": 85000 + }, + { + "epoch": 0.85, + "eval_accuracy": 0.8806242189602479, + "eval_loss": 0.54638671875, + "eval_runtime": 98.7484, + "eval_samples_per_second": 810.14, + "eval_steps_per_second": 1.59, + "step": 85000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011892631578947368, + "loss": 0.5437, + "step": 85010 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011884736842105261, + "loss": 0.5461, + "step": 85020 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011876842105263156, + "loss": 0.5477, + "step": 85030 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011868947368421052, + "loss": 0.5339, + "step": 85040 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011861052631578947, + "loss": 0.5559, + "step": 85050 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001185315789473684, + "loss": 0.5535, + "step": 85060 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011845263157894735, + "loss": 0.5652, + "step": 85070 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001183736842105263, + "loss": 0.5791, + "step": 85080 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011829473684210526, + "loss": 0.5687, + "step": 85090 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001182157894736842, + "loss": 0.5531, + "step": 85100 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011813684210526315, + "loss": 0.5756, + "step": 85110 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001180578947368421, + "loss": 0.577, + "step": 85120 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011797894736842103, + "loss": 0.5682, + "step": 85130 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011789999999999999, + "loss": 0.5811, + "step": 85140 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011782105263157894, + "loss": 0.5629, + "step": 85150 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011774210526315788, + "loss": 0.5632, + "step": 85160 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011766315789473682, + "loss": 0.5655, + "step": 85170 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011758421052631579, + "loss": 0.5734, + "step": 85180 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011750526315789473, + "loss": 0.5627, + "step": 85190 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011742631578947367, + "loss": 0.5543, + "step": 85200 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011734736842105262, + "loss": 0.5608, + "step": 85210 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011726842105263157, + "loss": 0.5596, + "step": 85220 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011718947368421053, + "loss": 0.5672, + "step": 85230 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011711052631578946, + "loss": 0.5619, + "step": 85240 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011703157894736841, + "loss": 0.5518, + "step": 85250 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011695263157894736, + "loss": 0.5648, + "step": 85260 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011687368421052632, + "loss": 0.5722, + "step": 85270 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011679473684210526, + "loss": 0.5628, + "step": 85280 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001167157894736842, + "loss": 0.573, + "step": 85290 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011663684210526315, + "loss": 0.5702, + "step": 85300 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011655789473684209, + "loss": 0.5746, + "step": 85310 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011647894736842105, + "loss": 0.5577, + "step": 85320 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001164, + "loss": 0.5549, + "step": 85330 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011632105263157893, + "loss": 0.5478, + "step": 85340 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011624210526315788, + "loss": 0.567, + "step": 85350 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011616315789473683, + "loss": 0.5715, + "step": 85360 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011608421052631579, + "loss": 0.5782, + "step": 85370 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011600526315789472, + "loss": 0.5762, + "step": 85380 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011592631578947367, + "loss": 0.5778, + "step": 85390 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011584736842105262, + "loss": 0.5692, + "step": 85400 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011576842105263158, + "loss": 0.5604, + "step": 85410 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011568947368421052, + "loss": 0.5752, + "step": 85420 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011561052631578947, + "loss": 0.5626, + "step": 85430 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011553157894736841, + "loss": 0.5703, + "step": 85440 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011545263157894735, + "loss": 0.5747, + "step": 85450 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011537368421052631, + "loss": 0.5706, + "step": 85460 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011529473684210526, + "loss": 0.5577, + "step": 85470 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001152157894736842, + "loss": 0.56, + "step": 85480 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011513684210526314, + "loss": 0.5659, + "step": 85490 + }, + { + "epoch": 0.85, + "learning_rate": 0.00011505789473684209, + "loss": 0.5639, + "step": 85500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011497894736842105, + "loss": 0.5692, + "step": 85510 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011489999999999999, + "loss": 0.5699, + "step": 85520 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011482105263157894, + "loss": 0.5704, + "step": 85530 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011474210526315788, + "loss": 0.5697, + "step": 85540 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011466315789473685, + "loss": 0.572, + "step": 85550 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011459210526315788, + "loss": 0.5686, + "step": 85560 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011451315789473684, + "loss": 0.5621, + "step": 85570 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011443421052631578, + "loss": 0.5729, + "step": 85580 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011435526315789473, + "loss": 0.5724, + "step": 85590 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011427631578947367, + "loss": 0.5769, + "step": 85600 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011419736842105262, + "loss": 0.5802, + "step": 85610 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011411842105263157, + "loss": 0.5811, + "step": 85620 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011403947368421052, + "loss": 0.5598, + "step": 85630 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011396052631578947, + "loss": 0.5693, + "step": 85640 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001138815789473684, + "loss": 0.5682, + "step": 85650 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011380263157894735, + "loss": 0.5734, + "step": 85660 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011372368421052631, + "loss": 0.5715, + "step": 85670 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011364473684210526, + "loss": 0.5658, + "step": 85680 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001135657894736842, + "loss": 0.5767, + "step": 85690 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011348684210526314, + "loss": 0.5685, + "step": 85700 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001134078947368421, + "loss": 0.5574, + "step": 85710 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011332894736842105, + "loss": 0.5676, + "step": 85720 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011324999999999999, + "loss": 0.5559, + "step": 85730 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011317105263157894, + "loss": 0.5546, + "step": 85740 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011309210526315788, + "loss": 0.554, + "step": 85750 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011301315789473683, + "loss": 0.5602, + "step": 85760 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011293421052631578, + "loss": 0.5702, + "step": 85770 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011285526315789473, + "loss": 0.5716, + "step": 85780 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011277631578947366, + "loss": 0.5761, + "step": 85790 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011269736842105261, + "loss": 0.5572, + "step": 85800 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011261842105263157, + "loss": 0.5822, + "step": 85810 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011253947368421052, + "loss": 0.5724, + "step": 85820 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011246052631578946, + "loss": 0.5678, + "step": 85830 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001123815789473684, + "loss": 0.5632, + "step": 85840 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011230263157894737, + "loss": 0.5662, + "step": 85850 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011222368421052632, + "loss": 0.5786, + "step": 85860 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011214473684210525, + "loss": 0.5667, + "step": 85870 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001120657894736842, + "loss": 0.5742, + "step": 85880 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011198684210526315, + "loss": 0.5646, + "step": 85890 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001119078947368421, + "loss": 0.5775, + "step": 85900 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011182894736842104, + "loss": 0.5587, + "step": 85910 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011174999999999999, + "loss": 0.5676, + "step": 85920 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011167105263157893, + "loss": 0.5542, + "step": 85930 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011159210526315788, + "loss": 0.5562, + "step": 85940 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011151315789473684, + "loss": 0.555, + "step": 85950 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011143421052631579, + "loss": 0.5691, + "step": 85960 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011135526315789472, + "loss": 0.5806, + "step": 85970 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011127631578947367, + "loss": 0.5658, + "step": 85980 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011119736842105263, + "loss": 0.5802, + "step": 85990 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011111842105263158, + "loss": 0.5685, + "step": 86000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011103947368421051, + "loss": 0.5868, + "step": 86010 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011096052631578946, + "loss": 0.5774, + "step": 86020 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011088157894736841, + "loss": 0.5792, + "step": 86030 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011080263157894736, + "loss": 0.572, + "step": 86040 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001107236842105263, + "loss": 0.5763, + "step": 86050 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011064473684210525, + "loss": 0.5744, + "step": 86060 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001105657894736842, + "loss": 0.5807, + "step": 86070 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011048684210526314, + "loss": 0.578, + "step": 86080 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001104078947368421, + "loss": 0.5676, + "step": 86090 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011032894736842105, + "loss": 0.5795, + "step": 86100 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011024999999999998, + "loss": 0.5555, + "step": 86110 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011017105263157893, + "loss": 0.5618, + "step": 86120 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011009210526315789, + "loss": 0.572, + "step": 86130 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011001315789473684, + "loss": 0.5662, + "step": 86140 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010993421052631578, + "loss": 0.5739, + "step": 86150 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010985526315789472, + "loss": 0.5603, + "step": 86160 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010977631578947367, + "loss": 0.5634, + "step": 86170 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010969736842105263, + "loss": 0.5684, + "step": 86180 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010961842105263157, + "loss": 0.5711, + "step": 86190 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010953947368421052, + "loss": 0.5731, + "step": 86200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010946052631578947, + "loss": 0.5663, + "step": 86210 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001093815789473684, + "loss": 0.569, + "step": 86220 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010930263157894736, + "loss": 0.566, + "step": 86230 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010922368421052631, + "loss": 0.5642, + "step": 86240 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010914473684210525, + "loss": 0.5614, + "step": 86250 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001090657894736842, + "loss": 0.554, + "step": 86260 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010898684210526316, + "loss": 0.5498, + "step": 86270 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001089078947368421, + "loss": 0.5544, + "step": 86280 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010882894736842104, + "loss": 0.5617, + "step": 86290 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010874999999999999, + "loss": 0.5575, + "step": 86300 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010867105263157894, + "loss": 0.5585, + "step": 86310 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001085921052631579, + "loss": 0.5603, + "step": 86320 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010851315789473683, + "loss": 0.5498, + "step": 86330 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010843421052631578, + "loss": 0.5574, + "step": 86340 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010835526315789473, + "loss": 0.5768, + "step": 86350 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010827631578947366, + "loss": 0.5804, + "step": 86360 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010819736842105263, + "loss": 0.5593, + "step": 86370 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010811842105263157, + "loss": 0.5625, + "step": 86380 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010803947368421051, + "loss": 0.5592, + "step": 86390 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010796052631578946, + "loss": 0.5645, + "step": 86400 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010788157894736842, + "loss": 0.5598, + "step": 86410 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010780263157894737, + "loss": 0.567, + "step": 86420 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001077236842105263, + "loss": 0.5729, + "step": 86430 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010764473684210525, + "loss": 0.5636, + "step": 86440 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001075657894736842, + "loss": 0.5656, + "step": 86450 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010748684210526316, + "loss": 0.571, + "step": 86460 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001074078947368421, + "loss": 0.5764, + "step": 86470 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010732894736842104, + "loss": 0.5652, + "step": 86480 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010724999999999999, + "loss": 0.5716, + "step": 86490 + }, + { + "epoch": 0.86, + "learning_rate": 0.00010717105263157893, + "loss": 0.572, + "step": 86500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010709210526315789, + "loss": 0.5799, + "step": 86510 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010701315789473684, + "loss": 0.5555, + "step": 86520 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010693421052631578, + "loss": 0.568, + "step": 86530 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010685526315789472, + "loss": 0.5589, + "step": 86540 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010677631578947368, + "loss": 0.5774, + "step": 86550 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010670526315789472, + "loss": 0.5726, + "step": 86560 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010662631578947368, + "loss": 0.5817, + "step": 86570 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010654736842105263, + "loss": 0.5636, + "step": 86580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010646842105263157, + "loss": 0.5581, + "step": 86590 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010638947368421051, + "loss": 0.5577, + "step": 86600 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010631052631578946, + "loss": 0.5483, + "step": 86610 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010623157894736842, + "loss": 0.5685, + "step": 86620 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010615263157894737, + "loss": 0.5518, + "step": 86630 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001060736842105263, + "loss": 0.5655, + "step": 86640 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010599473684210525, + "loss": 0.5541, + "step": 86650 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010591578947368421, + "loss": 0.5508, + "step": 86660 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010583684210526315, + "loss": 0.5664, + "step": 86670 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001057578947368421, + "loss": 0.5632, + "step": 86680 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010567894736842104, + "loss": 0.5748, + "step": 86690 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010559999999999998, + "loss": 0.5573, + "step": 86700 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010552105263157894, + "loss": 0.5541, + "step": 86710 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010544210526315789, + "loss": 0.5653, + "step": 86720 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010536315789473684, + "loss": 0.5746, + "step": 86730 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010528421052631577, + "loss": 0.5615, + "step": 86740 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010520526315789472, + "loss": 0.5608, + "step": 86750 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010512631578947368, + "loss": 0.5663, + "step": 86760 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010504736842105263, + "loss": 0.5796, + "step": 86770 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010496842105263156, + "loss": 0.5762, + "step": 86780 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010488947368421051, + "loss": 0.5784, + "step": 86790 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010481052631578947, + "loss": 0.5815, + "step": 86800 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010473157894736841, + "loss": 0.5618, + "step": 86810 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010465263157894736, + "loss": 0.563, + "step": 86820 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001045736842105263, + "loss": 0.5587, + "step": 86830 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010449473684210525, + "loss": 0.5658, + "step": 86840 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001044157894736842, + "loss": 0.5656, + "step": 86850 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010433684210526315, + "loss": 0.5544, + "step": 86860 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001042578947368421, + "loss": 0.5633, + "step": 86870 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010417894736842103, + "loss": 0.5526, + "step": 86880 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010409999999999998, + "loss": 0.5532, + "step": 86890 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010402105263157894, + "loss": 0.5569, + "step": 86900 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010394210526315789, + "loss": 0.5502, + "step": 86910 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010386315789473683, + "loss": 0.557, + "step": 86920 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010378421052631578, + "loss": 0.5648, + "step": 86930 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010370526315789474, + "loss": 0.5772, + "step": 86940 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010362631578947369, + "loss": 0.5599, + "step": 86950 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010354736842105262, + "loss": 0.5584, + "step": 86960 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010346842105263157, + "loss": 0.5611, + "step": 86970 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010338947368421052, + "loss": 0.5718, + "step": 86980 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010331052631578947, + "loss": 0.5668, + "step": 86990 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010323157894736841, + "loss": 0.5624, + "step": 87000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010315263157894736, + "loss": 0.5586, + "step": 87010 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001030736842105263, + "loss": 0.5577, + "step": 87020 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010299473684210525, + "loss": 0.56, + "step": 87030 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010291578947368421, + "loss": 0.5702, + "step": 87040 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010283684210526316, + "loss": 0.565, + "step": 87050 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010275789473684209, + "loss": 0.5633, + "step": 87060 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010267894736842104, + "loss": 0.5588, + "step": 87070 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001026, + "loss": 0.5518, + "step": 87080 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010252105263157895, + "loss": 0.5547, + "step": 87090 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010244210526315788, + "loss": 0.5612, + "step": 87100 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010236315789473683, + "loss": 0.5587, + "step": 87110 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010228421052631578, + "loss": 0.5513, + "step": 87120 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010220526315789473, + "loss": 0.5647, + "step": 87130 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010212631578947368, + "loss": 0.557, + "step": 87140 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010204736842105262, + "loss": 0.5613, + "step": 87150 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010196842105263156, + "loss": 0.5687, + "step": 87160 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010188947368421051, + "loss": 0.5627, + "step": 87170 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010181052631578947, + "loss": 0.5628, + "step": 87180 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010173157894736842, + "loss": 0.5652, + "step": 87190 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010165263157894735, + "loss": 0.5752, + "step": 87200 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001015736842105263, + "loss": 0.561, + "step": 87210 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010149473684210526, + "loss": 0.5793, + "step": 87220 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010141578947368421, + "loss": 0.5658, + "step": 87230 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010133684210526315, + "loss": 0.5796, + "step": 87240 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001012578947368421, + "loss": 0.5635, + "step": 87250 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010117894736842104, + "loss": 0.563, + "step": 87260 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001011, + "loss": 0.5685, + "step": 87270 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010102105263157894, + "loss": 0.5507, + "step": 87280 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010094210526315789, + "loss": 0.5601, + "step": 87290 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010086315789473684, + "loss": 0.553, + "step": 87300 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010078421052631577, + "loss": 0.5715, + "step": 87310 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010070526315789473, + "loss": 0.5684, + "step": 87320 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010062631578947368, + "loss": 0.5795, + "step": 87330 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010054736842105262, + "loss": 0.568, + "step": 87340 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010046842105263156, + "loss": 0.5803, + "step": 87350 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010038947368421053, + "loss": 0.5711, + "step": 87360 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010031052631578947, + "loss": 0.5625, + "step": 87370 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010023157894736841, + "loss": 0.5512, + "step": 87380 + }, + { + "epoch": 0.87, + "learning_rate": 0.00010015263157894736, + "loss": 0.5513, + "step": 87390 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001000736842105263, + "loss": 0.5541, + "step": 87400 + }, + { + "epoch": 0.87, + "learning_rate": 9.999473684210527e-05, + "loss": 0.5676, + "step": 87410 + }, + { + "epoch": 0.87, + "learning_rate": 9.99157894736842e-05, + "loss": 0.5792, + "step": 87420 + }, + { + "epoch": 0.87, + "learning_rate": 9.983684210526315e-05, + "loss": 0.5725, + "step": 87430 + }, + { + "epoch": 0.87, + "learning_rate": 9.97578947368421e-05, + "loss": 0.5694, + "step": 87440 + }, + { + "epoch": 0.87, + "learning_rate": 9.967894736842103e-05, + "loss": 0.5583, + "step": 87450 + }, + { + "epoch": 0.87, + "learning_rate": 9.96e-05, + "loss": 0.5669, + "step": 87460 + }, + { + "epoch": 0.87, + "learning_rate": 9.952105263157894e-05, + "loss": 0.571, + "step": 87470 + }, + { + "epoch": 0.87, + "learning_rate": 9.944210526315788e-05, + "loss": 0.5622, + "step": 87480 + }, + { + "epoch": 0.87, + "learning_rate": 9.936315789473683e-05, + "loss": 0.5697, + "step": 87490 + }, + { + "epoch": 0.88, + "learning_rate": 9.928421052631579e-05, + "loss": 0.5629, + "step": 87500 + }, + { + "epoch": 0.88, + "eval_accuracy": 0.8814306303350578, + "eval_loss": 0.541015625, + "eval_runtime": 97.7663, + "eval_samples_per_second": 818.278, + "eval_steps_per_second": 1.606, + "step": 87500 + }, + { + "epoch": 0.88, + "learning_rate": 9.920526315789474e-05, + "loss": 0.5606, + "step": 87510 + }, + { + "epoch": 0.88, + "learning_rate": 9.912631578947367e-05, + "loss": 0.5534, + "step": 87520 + }, + { + "epoch": 0.88, + "learning_rate": 9.904736842105262e-05, + "loss": 0.5601, + "step": 87530 + }, + { + "epoch": 0.88, + "learning_rate": 9.896842105263157e-05, + "loss": 0.5721, + "step": 87540 + }, + { + "epoch": 0.88, + "learning_rate": 9.888947368421053e-05, + "loss": 0.5701, + "step": 87550 + }, + { + "epoch": 0.88, + "learning_rate": 9.881842105263156e-05, + "loss": 0.5598, + "step": 87560 + }, + { + "epoch": 0.88, + "learning_rate": 9.873947368421053e-05, + "loss": 0.57, + "step": 87570 + }, + { + "epoch": 0.88, + "learning_rate": 9.866052631578946e-05, + "loss": 0.5681, + "step": 87580 + }, + { + "epoch": 0.88, + "learning_rate": 9.858157894736841e-05, + "loss": 0.5678, + "step": 87590 + }, + { + "epoch": 0.88, + "learning_rate": 9.850263157894736e-05, + "loss": 0.5571, + "step": 87600 + }, + { + "epoch": 0.88, + "learning_rate": 9.84236842105263e-05, + "loss": 0.5652, + "step": 87610 + }, + { + "epoch": 0.88, + "learning_rate": 9.834473684210525e-05, + "loss": 0.5726, + "step": 87620 + }, + { + "epoch": 0.88, + "learning_rate": 9.82657894736842e-05, + "loss": 0.565, + "step": 87630 + }, + { + "epoch": 0.88, + "learning_rate": 9.818684210526315e-05, + "loss": 0.5593, + "step": 87640 + }, + { + "epoch": 0.88, + "learning_rate": 9.810789473684209e-05, + "loss": 0.5588, + "step": 87650 + }, + { + "epoch": 0.88, + "learning_rate": 9.802894736842105e-05, + "loss": 0.5545, + "step": 87660 + }, + { + "epoch": 0.88, + "learning_rate": 9.795e-05, + "loss": 0.5685, + "step": 87670 + }, + { + "epoch": 0.88, + "learning_rate": 9.787105263157894e-05, + "loss": 0.5628, + "step": 87680 + }, + { + "epoch": 0.88, + "learning_rate": 9.779210526315788e-05, + "loss": 0.5589, + "step": 87690 + }, + { + "epoch": 0.88, + "learning_rate": 9.771315789473683e-05, + "loss": 0.5618, + "step": 87700 + }, + { + "epoch": 0.88, + "learning_rate": 9.763421052631579e-05, + "loss": 0.5652, + "step": 87710 + }, + { + "epoch": 0.88, + "learning_rate": 9.755526315789474e-05, + "loss": 0.5586, + "step": 87720 + }, + { + "epoch": 0.88, + "learning_rate": 9.747631578947367e-05, + "loss": 0.5566, + "step": 87730 + }, + { + "epoch": 0.88, + "learning_rate": 9.739736842105262e-05, + "loss": 0.5615, + "step": 87740 + }, + { + "epoch": 0.88, + "learning_rate": 9.731842105263157e-05, + "loss": 0.5643, + "step": 87750 + }, + { + "epoch": 0.88, + "learning_rate": 9.723947368421052e-05, + "loss": 0.5548, + "step": 87760 + }, + { + "epoch": 0.88, + "learning_rate": 9.716052631578947e-05, + "loss": 0.5805, + "step": 87770 + }, + { + "epoch": 0.88, + "learning_rate": 9.708157894736841e-05, + "loss": 0.5654, + "step": 87780 + }, + { + "epoch": 0.88, + "learning_rate": 9.700263157894735e-05, + "loss": 0.573, + "step": 87790 + }, + { + "epoch": 0.88, + "learning_rate": 9.692368421052631e-05, + "loss": 0.5512, + "step": 87800 + }, + { + "epoch": 0.88, + "learning_rate": 9.684473684210526e-05, + "loss": 0.5689, + "step": 87810 + }, + { + "epoch": 0.88, + "learning_rate": 9.67657894736842e-05, + "loss": 0.5723, + "step": 87820 + }, + { + "epoch": 0.88, + "learning_rate": 9.668684210526314e-05, + "loss": 0.5695, + "step": 87830 + }, + { + "epoch": 0.88, + "learning_rate": 9.660789473684209e-05, + "loss": 0.5572, + "step": 87840 + }, + { + "epoch": 0.88, + "learning_rate": 9.652894736842105e-05, + "loss": 0.5727, + "step": 87850 + }, + { + "epoch": 0.88, + "learning_rate": 9.645e-05, + "loss": 0.5558, + "step": 87860 + }, + { + "epoch": 0.88, + "learning_rate": 9.637105263157893e-05, + "loss": 0.5543, + "step": 87870 + }, + { + "epoch": 0.88, + "learning_rate": 9.629210526315788e-05, + "loss": 0.5576, + "step": 87880 + }, + { + "epoch": 0.88, + "learning_rate": 9.621315789473684e-05, + "loss": 0.5601, + "step": 87890 + }, + { + "epoch": 0.88, + "learning_rate": 9.613421052631578e-05, + "loss": 0.5651, + "step": 87900 + }, + { + "epoch": 0.88, + "learning_rate": 9.605526315789473e-05, + "loss": 0.5664, + "step": 87910 + }, + { + "epoch": 0.88, + "learning_rate": 9.597631578947368e-05, + "loss": 0.5646, + "step": 87920 + }, + { + "epoch": 0.88, + "learning_rate": 9.589736842105261e-05, + "loss": 0.5531, + "step": 87930 + }, + { + "epoch": 0.88, + "learning_rate": 9.581842105263157e-05, + "loss": 0.5607, + "step": 87940 + }, + { + "epoch": 0.88, + "learning_rate": 9.573947368421052e-05, + "loss": 0.5465, + "step": 87950 + }, + { + "epoch": 0.88, + "learning_rate": 9.566052631578947e-05, + "loss": 0.5573, + "step": 87960 + }, + { + "epoch": 0.88, + "learning_rate": 9.55815789473684e-05, + "loss": 0.5427, + "step": 87970 + }, + { + "epoch": 0.88, + "learning_rate": 9.550263157894735e-05, + "loss": 0.5453, + "step": 87980 + }, + { + "epoch": 0.88, + "learning_rate": 9.542368421052631e-05, + "loss": 0.5498, + "step": 87990 + }, + { + "epoch": 0.88, + "learning_rate": 9.534473684210526e-05, + "loss": 0.5599, + "step": 88000 + }, + { + "epoch": 0.88, + "learning_rate": 9.52657894736842e-05, + "loss": 0.5659, + "step": 88010 + }, + { + "epoch": 0.88, + "learning_rate": 9.518684210526315e-05, + "loss": 0.5546, + "step": 88020 + }, + { + "epoch": 0.88, + "learning_rate": 9.510789473684211e-05, + "loss": 0.5659, + "step": 88030 + }, + { + "epoch": 0.88, + "learning_rate": 9.502894736842106e-05, + "loss": 0.5566, + "step": 88040 + }, + { + "epoch": 0.88, + "learning_rate": 9.494999999999999e-05, + "loss": 0.5683, + "step": 88050 + }, + { + "epoch": 0.88, + "learning_rate": 9.487105263157894e-05, + "loss": 0.5522, + "step": 88060 + }, + { + "epoch": 0.88, + "learning_rate": 9.479210526315789e-05, + "loss": 0.5766, + "step": 88070 + }, + { + "epoch": 0.88, + "learning_rate": 9.471315789473684e-05, + "loss": 0.5714, + "step": 88080 + }, + { + "epoch": 0.88, + "learning_rate": 9.463421052631578e-05, + "loss": 0.5489, + "step": 88090 + }, + { + "epoch": 0.88, + "learning_rate": 9.456315789473685e-05, + "loss": 0.5552, + "step": 88100 + }, + { + "epoch": 0.88, + "learning_rate": 9.448421052631578e-05, + "loss": 0.5597, + "step": 88110 + }, + { + "epoch": 0.88, + "learning_rate": 9.440526315789473e-05, + "loss": 0.5621, + "step": 88120 + }, + { + "epoch": 0.88, + "learning_rate": 9.432631578947368e-05, + "loss": 0.5716, + "step": 88130 + }, + { + "epoch": 0.88, + "learning_rate": 9.424736842105261e-05, + "loss": 0.5607, + "step": 88140 + }, + { + "epoch": 0.88, + "learning_rate": 9.416842105263157e-05, + "loss": 0.5636, + "step": 88150 + }, + { + "epoch": 0.88, + "learning_rate": 9.408947368421052e-05, + "loss": 0.5482, + "step": 88160 + }, + { + "epoch": 0.88, + "learning_rate": 9.401052631578947e-05, + "loss": 0.5493, + "step": 88170 + }, + { + "epoch": 0.88, + "learning_rate": 9.39315789473684e-05, + "loss": 0.5473, + "step": 88180 + }, + { + "epoch": 0.88, + "learning_rate": 9.385263157894737e-05, + "loss": 0.5527, + "step": 88190 + }, + { + "epoch": 0.88, + "learning_rate": 9.377368421052631e-05, + "loss": 0.5507, + "step": 88200 + }, + { + "epoch": 0.88, + "learning_rate": 9.369473684210525e-05, + "loss": 0.5551, + "step": 88210 + }, + { + "epoch": 0.88, + "learning_rate": 9.36157894736842e-05, + "loss": 0.5569, + "step": 88220 + }, + { + "epoch": 0.88, + "learning_rate": 9.353684210526315e-05, + "loss": 0.5631, + "step": 88230 + }, + { + "epoch": 0.88, + "learning_rate": 9.345789473684211e-05, + "loss": 0.5582, + "step": 88240 + }, + { + "epoch": 0.88, + "learning_rate": 9.337894736842104e-05, + "loss": 0.5558, + "step": 88250 + }, + { + "epoch": 0.88, + "learning_rate": 9.329999999999999e-05, + "loss": 0.5542, + "step": 88260 + }, + { + "epoch": 0.88, + "learning_rate": 9.322105263157894e-05, + "loss": 0.5714, + "step": 88270 + }, + { + "epoch": 0.88, + "learning_rate": 9.314210526315787e-05, + "loss": 0.5604, + "step": 88280 + }, + { + "epoch": 0.88, + "learning_rate": 9.306315789473684e-05, + "loss": 0.5693, + "step": 88290 + }, + { + "epoch": 0.88, + "learning_rate": 9.298421052631578e-05, + "loss": 0.5553, + "step": 88300 + }, + { + "epoch": 0.88, + "learning_rate": 9.290526315789473e-05, + "loss": 0.5649, + "step": 88310 + }, + { + "epoch": 0.88, + "learning_rate": 9.282631578947367e-05, + "loss": 0.5605, + "step": 88320 + }, + { + "epoch": 0.88, + "learning_rate": 9.274736842105263e-05, + "loss": 0.5625, + "step": 88330 + }, + { + "epoch": 0.88, + "learning_rate": 9.266842105263158e-05, + "loss": 0.5614, + "step": 88340 + }, + { + "epoch": 0.88, + "learning_rate": 9.258947368421051e-05, + "loss": 0.5655, + "step": 88350 + }, + { + "epoch": 0.88, + "learning_rate": 9.251052631578946e-05, + "loss": 0.5665, + "step": 88360 + }, + { + "epoch": 0.88, + "learning_rate": 9.243157894736841e-05, + "loss": 0.5502, + "step": 88370 + }, + { + "epoch": 0.88, + "learning_rate": 9.235263157894737e-05, + "loss": 0.5712, + "step": 88380 + }, + { + "epoch": 0.88, + "learning_rate": 9.22736842105263e-05, + "loss": 0.5596, + "step": 88390 + }, + { + "epoch": 0.88, + "learning_rate": 9.219473684210525e-05, + "loss": 0.565, + "step": 88400 + }, + { + "epoch": 0.88, + "learning_rate": 9.21157894736842e-05, + "loss": 0.5525, + "step": 88410 + }, + { + "epoch": 0.88, + "learning_rate": 9.203684210526314e-05, + "loss": 0.561, + "step": 88420 + }, + { + "epoch": 0.88, + "learning_rate": 9.19578947368421e-05, + "loss": 0.5502, + "step": 88430 + }, + { + "epoch": 0.88, + "learning_rate": 9.187894736842105e-05, + "loss": 0.5437, + "step": 88440 + }, + { + "epoch": 0.88, + "learning_rate": 9.18e-05, + "loss": 0.5445, + "step": 88450 + }, + { + "epoch": 0.88, + "learning_rate": 9.172105263157893e-05, + "loss": 0.563, + "step": 88460 + }, + { + "epoch": 0.88, + "learning_rate": 9.164210526315789e-05, + "loss": 0.5559, + "step": 88470 + }, + { + "epoch": 0.88, + "learning_rate": 9.156315789473684e-05, + "loss": 0.5428, + "step": 88480 + }, + { + "epoch": 0.88, + "learning_rate": 9.148421052631579e-05, + "loss": 0.5585, + "step": 88490 + }, + { + "epoch": 0.89, + "learning_rate": 9.140526315789472e-05, + "loss": 0.555, + "step": 88500 + }, + { + "epoch": 0.89, + "learning_rate": 9.132631578947367e-05, + "loss": 0.5511, + "step": 88510 + }, + { + "epoch": 0.89, + "learning_rate": 9.124736842105263e-05, + "loss": 0.5623, + "step": 88520 + }, + { + "epoch": 0.89, + "learning_rate": 9.116842105263157e-05, + "loss": 0.5565, + "step": 88530 + }, + { + "epoch": 0.89, + "learning_rate": 9.108947368421052e-05, + "loss": 0.5556, + "step": 88540 + }, + { + "epoch": 0.89, + "learning_rate": 9.101052631578946e-05, + "loss": 0.5597, + "step": 88550 + }, + { + "epoch": 0.89, + "learning_rate": 9.09315789473684e-05, + "loss": 0.5525, + "step": 88560 + }, + { + "epoch": 0.89, + "learning_rate": 9.085263157894736e-05, + "loss": 0.5513, + "step": 88570 + }, + { + "epoch": 0.89, + "learning_rate": 9.077368421052631e-05, + "loss": 0.5645, + "step": 88580 + }, + { + "epoch": 0.89, + "learning_rate": 9.069473684210526e-05, + "loss": 0.565, + "step": 88590 + }, + { + "epoch": 0.89, + "learning_rate": 9.061578947368419e-05, + "loss": 0.5631, + "step": 88600 + }, + { + "epoch": 0.89, + "learning_rate": 9.053684210526315e-05, + "loss": 0.5731, + "step": 88610 + }, + { + "epoch": 0.89, + "learning_rate": 9.04578947368421e-05, + "loss": 0.5634, + "step": 88620 + }, + { + "epoch": 0.89, + "learning_rate": 9.037894736842105e-05, + "loss": 0.5574, + "step": 88630 + }, + { + "epoch": 0.89, + "learning_rate": 9.029999999999999e-05, + "loss": 0.5612, + "step": 88640 + }, + { + "epoch": 0.89, + "learning_rate": 9.022105263157893e-05, + "loss": 0.5519, + "step": 88650 + }, + { + "epoch": 0.89, + "learning_rate": 9.01421052631579e-05, + "loss": 0.5633, + "step": 88660 + }, + { + "epoch": 0.89, + "learning_rate": 9.006315789473683e-05, + "loss": 0.5493, + "step": 88670 + }, + { + "epoch": 0.89, + "learning_rate": 8.998421052631578e-05, + "loss": 0.5427, + "step": 88680 + }, + { + "epoch": 0.89, + "learning_rate": 8.990526315789473e-05, + "loss": 0.5537, + "step": 88690 + }, + { + "epoch": 0.89, + "learning_rate": 8.982631578947366e-05, + "loss": 0.5618, + "step": 88700 + }, + { + "epoch": 0.89, + "learning_rate": 8.974736842105262e-05, + "loss": 0.5594, + "step": 88710 + }, + { + "epoch": 0.89, + "learning_rate": 8.966842105263157e-05, + "loss": 0.5613, + "step": 88720 + }, + { + "epoch": 0.89, + "learning_rate": 8.958947368421052e-05, + "loss": 0.5563, + "step": 88730 + }, + { + "epoch": 0.89, + "learning_rate": 8.951052631578946e-05, + "loss": 0.558, + "step": 88740 + }, + { + "epoch": 0.89, + "learning_rate": 8.943157894736842e-05, + "loss": 0.5611, + "step": 88750 + }, + { + "epoch": 0.89, + "learning_rate": 8.935263157894737e-05, + "loss": 0.5707, + "step": 88760 + }, + { + "epoch": 0.89, + "learning_rate": 8.927368421052631e-05, + "loss": 0.5711, + "step": 88770 + }, + { + "epoch": 0.89, + "learning_rate": 8.919473684210525e-05, + "loss": 0.5677, + "step": 88780 + }, + { + "epoch": 0.89, + "learning_rate": 8.91157894736842e-05, + "loss": 0.5558, + "step": 88790 + }, + { + "epoch": 0.89, + "learning_rate": 8.903684210526316e-05, + "loss": 0.5675, + "step": 88800 + }, + { + "epoch": 0.89, + "learning_rate": 8.895789473684211e-05, + "loss": 0.5635, + "step": 88810 + }, + { + "epoch": 0.89, + "learning_rate": 8.887894736842104e-05, + "loss": 0.5631, + "step": 88820 + }, + { + "epoch": 0.89, + "learning_rate": 8.879999999999999e-05, + "loss": 0.5617, + "step": 88830 + }, + { + "epoch": 0.89, + "learning_rate": 8.872105263157894e-05, + "loss": 0.5618, + "step": 88840 + }, + { + "epoch": 0.89, + "learning_rate": 8.864210526315789e-05, + "loss": 0.5515, + "step": 88850 + }, + { + "epoch": 0.89, + "learning_rate": 8.856315789473683e-05, + "loss": 0.5603, + "step": 88860 + }, + { + "epoch": 0.89, + "learning_rate": 8.848421052631578e-05, + "loss": 0.555, + "step": 88870 + }, + { + "epoch": 0.89, + "learning_rate": 8.840526315789472e-05, + "loss": 0.5655, + "step": 88880 + }, + { + "epoch": 0.89, + "learning_rate": 8.832631578947368e-05, + "loss": 0.544, + "step": 88890 + }, + { + "epoch": 0.89, + "learning_rate": 8.824736842105263e-05, + "loss": 0.5488, + "step": 88900 + }, + { + "epoch": 0.89, + "learning_rate": 8.816842105263158e-05, + "loss": 0.5457, + "step": 88910 + }, + { + "epoch": 0.89, + "learning_rate": 8.808947368421051e-05, + "loss": 0.5529, + "step": 88920 + }, + { + "epoch": 0.89, + "learning_rate": 8.801052631578946e-05, + "loss": 0.545, + "step": 88930 + }, + { + "epoch": 0.89, + "learning_rate": 8.793157894736842e-05, + "loss": 0.5531, + "step": 88940 + }, + { + "epoch": 0.89, + "learning_rate": 8.785263157894737e-05, + "loss": 0.5613, + "step": 88950 + }, + { + "epoch": 0.89, + "learning_rate": 8.77736842105263e-05, + "loss": 0.5542, + "step": 88960 + }, + { + "epoch": 0.89, + "learning_rate": 8.769473684210525e-05, + "loss": 0.5573, + "step": 88970 + }, + { + "epoch": 0.89, + "learning_rate": 8.76157894736842e-05, + "loss": 0.5621, + "step": 88980 + }, + { + "epoch": 0.89, + "learning_rate": 8.753684210526315e-05, + "loss": 0.5609, + "step": 88990 + }, + { + "epoch": 0.89, + "learning_rate": 8.74578947368421e-05, + "loss": 0.5511, + "step": 89000 + }, + { + "epoch": 0.89, + "learning_rate": 8.737894736842105e-05, + "loss": 0.551, + "step": 89010 + }, + { + "epoch": 0.89, + "learning_rate": 8.729999999999998e-05, + "loss": 0.5672, + "step": 89020 + }, + { + "epoch": 0.89, + "learning_rate": 8.722105263157894e-05, + "loss": 0.5635, + "step": 89030 + }, + { + "epoch": 0.89, + "learning_rate": 8.714210526315789e-05, + "loss": 0.5622, + "step": 89040 + }, + { + "epoch": 0.89, + "learning_rate": 8.706315789473684e-05, + "loss": 0.5481, + "step": 89050 + }, + { + "epoch": 0.89, + "learning_rate": 8.698421052631577e-05, + "loss": 0.5586, + "step": 89060 + }, + { + "epoch": 0.89, + "learning_rate": 8.690526315789472e-05, + "loss": 0.5593, + "step": 89070 + }, + { + "epoch": 0.89, + "learning_rate": 8.682631578947368e-05, + "loss": 0.5607, + "step": 89080 + }, + { + "epoch": 0.89, + "learning_rate": 8.674736842105263e-05, + "loss": 0.5487, + "step": 89090 + }, + { + "epoch": 0.89, + "learning_rate": 8.666842105263157e-05, + "loss": 0.5549, + "step": 89100 + }, + { + "epoch": 0.89, + "learning_rate": 8.658947368421052e-05, + "loss": 0.5402, + "step": 89110 + }, + { + "epoch": 0.89, + "learning_rate": 8.651052631578946e-05, + "loss": 0.5462, + "step": 89120 + }, + { + "epoch": 0.89, + "learning_rate": 8.643157894736841e-05, + "loss": 0.5393, + "step": 89130 + }, + { + "epoch": 0.89, + "learning_rate": 8.635263157894736e-05, + "loss": 0.5485, + "step": 89140 + }, + { + "epoch": 0.89, + "learning_rate": 8.627368421052631e-05, + "loss": 0.5348, + "step": 89150 + }, + { + "epoch": 0.89, + "learning_rate": 8.619473684210524e-05, + "loss": 0.5457, + "step": 89160 + }, + { + "epoch": 0.89, + "learning_rate": 8.61157894736842e-05, + "loss": 0.5443, + "step": 89170 + }, + { + "epoch": 0.89, + "learning_rate": 8.603684210526315e-05, + "loss": 0.5416, + "step": 89180 + }, + { + "epoch": 0.89, + "learning_rate": 8.59578947368421e-05, + "loss": 0.5547, + "step": 89190 + }, + { + "epoch": 0.89, + "learning_rate": 8.587894736842104e-05, + "loss": 0.5567, + "step": 89200 + }, + { + "epoch": 0.89, + "learning_rate": 8.579999999999998e-05, + "loss": 0.5465, + "step": 89210 + }, + { + "epoch": 0.89, + "learning_rate": 8.572105263157895e-05, + "loss": 0.5436, + "step": 89220 + }, + { + "epoch": 0.89, + "learning_rate": 8.56421052631579e-05, + "loss": 0.5476, + "step": 89230 + }, + { + "epoch": 0.89, + "learning_rate": 8.556315789473683e-05, + "loss": 0.5521, + "step": 89240 + }, + { + "epoch": 0.89, + "learning_rate": 8.548421052631578e-05, + "loss": 0.5449, + "step": 89250 + }, + { + "epoch": 0.89, + "learning_rate": 8.540526315789473e-05, + "loss": 0.5518, + "step": 89260 + }, + { + "epoch": 0.89, + "learning_rate": 8.532631578947369e-05, + "loss": 0.5415, + "step": 89270 + }, + { + "epoch": 0.89, + "learning_rate": 8.524736842105262e-05, + "loss": 0.5642, + "step": 89280 + }, + { + "epoch": 0.89, + "learning_rate": 8.516842105263157e-05, + "loss": 0.5591, + "step": 89290 + }, + { + "epoch": 0.89, + "learning_rate": 8.508947368421052e-05, + "loss": 0.5477, + "step": 89300 + }, + { + "epoch": 0.89, + "learning_rate": 8.501052631578947e-05, + "loss": 0.5412, + "step": 89310 + }, + { + "epoch": 0.89, + "learning_rate": 8.493157894736842e-05, + "loss": 0.557, + "step": 89320 + }, + { + "epoch": 0.89, + "learning_rate": 8.485263157894736e-05, + "loss": 0.5437, + "step": 89330 + }, + { + "epoch": 0.89, + "learning_rate": 8.47736842105263e-05, + "loss": 0.5496, + "step": 89340 + }, + { + "epoch": 0.89, + "learning_rate": 8.469473684210525e-05, + "loss": 0.5582, + "step": 89350 + }, + { + "epoch": 0.89, + "learning_rate": 8.461578947368421e-05, + "loss": 0.5559, + "step": 89360 + }, + { + "epoch": 0.89, + "learning_rate": 8.453684210526316e-05, + "loss": 0.5559, + "step": 89370 + }, + { + "epoch": 0.89, + "learning_rate": 8.445789473684209e-05, + "loss": 0.5458, + "step": 89380 + }, + { + "epoch": 0.89, + "learning_rate": 8.437894736842104e-05, + "loss": 0.5466, + "step": 89390 + }, + { + "epoch": 0.89, + "learning_rate": 8.43e-05, + "loss": 0.5434, + "step": 89400 + }, + { + "epoch": 0.89, + "learning_rate": 8.422105263157895e-05, + "loss": 0.5571, + "step": 89410 + }, + { + "epoch": 0.89, + "learning_rate": 8.414210526315789e-05, + "loss": 0.5477, + "step": 89420 + }, + { + "epoch": 0.89, + "learning_rate": 8.406315789473683e-05, + "loss": 0.5559, + "step": 89430 + }, + { + "epoch": 0.89, + "learning_rate": 8.398421052631578e-05, + "loss": 0.5469, + "step": 89440 + }, + { + "epoch": 0.89, + "learning_rate": 8.390526315789473e-05, + "loss": 0.5594, + "step": 89450 + }, + { + "epoch": 0.89, + "learning_rate": 8.382631578947368e-05, + "loss": 0.5549, + "step": 89460 + }, + { + "epoch": 0.89, + "learning_rate": 8.374736842105263e-05, + "loss": 0.5616, + "step": 89470 + }, + { + "epoch": 0.89, + "learning_rate": 8.366842105263156e-05, + "loss": 0.5534, + "step": 89480 + }, + { + "epoch": 0.89, + "learning_rate": 8.358947368421051e-05, + "loss": 0.5546, + "step": 89490 + }, + { + "epoch": 0.9, + "learning_rate": 8.351052631578947e-05, + "loss": 0.5519, + "step": 89500 + }, + { + "epoch": 0.9, + "learning_rate": 8.343157894736842e-05, + "loss": 0.5491, + "step": 89510 + }, + { + "epoch": 0.9, + "learning_rate": 8.335263157894736e-05, + "loss": 0.5394, + "step": 89520 + }, + { + "epoch": 0.9, + "learning_rate": 8.32736842105263e-05, + "loss": 0.5497, + "step": 89530 + }, + { + "epoch": 0.9, + "learning_rate": 8.319473684210527e-05, + "loss": 0.56, + "step": 89540 + }, + { + "epoch": 0.9, + "learning_rate": 8.311578947368421e-05, + "loss": 0.5481, + "step": 89550 + }, + { + "epoch": 0.9, + "learning_rate": 8.303684210526315e-05, + "loss": 0.5543, + "step": 89560 + }, + { + "epoch": 0.9, + "learning_rate": 8.29578947368421e-05, + "loss": 0.5625, + "step": 89570 + }, + { + "epoch": 0.9, + "learning_rate": 8.287894736842104e-05, + "loss": 0.5411, + "step": 89580 + }, + { + "epoch": 0.9, + "learning_rate": 8.28e-05, + "loss": 0.5489, + "step": 89590 + }, + { + "epoch": 0.9, + "learning_rate": 8.272105263157894e-05, + "loss": 0.542, + "step": 89600 + }, + { + "epoch": 0.9, + "learning_rate": 8.264210526315789e-05, + "loss": 0.5521, + "step": 89610 + }, + { + "epoch": 0.9, + "learning_rate": 8.256315789473682e-05, + "loss": 0.5438, + "step": 89620 + }, + { + "epoch": 0.9, + "learning_rate": 8.248421052631577e-05, + "loss": 0.5569, + "step": 89630 + }, + { + "epoch": 0.9, + "learning_rate": 8.240526315789473e-05, + "loss": 0.5563, + "step": 89640 + }, + { + "epoch": 0.9, + "learning_rate": 8.232631578947368e-05, + "loss": 0.5735, + "step": 89650 + }, + { + "epoch": 0.9, + "learning_rate": 8.224736842105262e-05, + "loss": 0.5714, + "step": 89660 + }, + { + "epoch": 0.9, + "learning_rate": 8.216842105263157e-05, + "loss": 0.5744, + "step": 89670 + }, + { + "epoch": 0.9, + "learning_rate": 8.208947368421053e-05, + "loss": 0.5674, + "step": 89680 + }, + { + "epoch": 0.9, + "learning_rate": 8.201052631578948e-05, + "loss": 0.5549, + "step": 89690 + }, + { + "epoch": 0.9, + "learning_rate": 8.193157894736841e-05, + "loss": 0.5675, + "step": 89700 + }, + { + "epoch": 0.9, + "learning_rate": 8.185263157894736e-05, + "loss": 0.5662, + "step": 89710 + }, + { + "epoch": 0.9, + "learning_rate": 8.177368421052631e-05, + "loss": 0.5685, + "step": 89720 + }, + { + "epoch": 0.9, + "learning_rate": 8.169473684210527e-05, + "loss": 0.5617, + "step": 89730 + }, + { + "epoch": 0.9, + "learning_rate": 8.16157894736842e-05, + "loss": 0.5657, + "step": 89740 + }, + { + "epoch": 0.9, + "learning_rate": 8.153684210526315e-05, + "loss": 0.5598, + "step": 89750 + }, + { + "epoch": 0.9, + "learning_rate": 8.14578947368421e-05, + "loss": 0.5597, + "step": 89760 + }, + { + "epoch": 0.9, + "learning_rate": 8.137894736842104e-05, + "loss": 0.5574, + "step": 89770 + }, + { + "epoch": 0.9, + "learning_rate": 8.13e-05, + "loss": 0.5665, + "step": 89780 + }, + { + "epoch": 0.9, + "learning_rate": 8.122105263157895e-05, + "loss": 0.568, + "step": 89790 + }, + { + "epoch": 0.9, + "learning_rate": 8.114210526315788e-05, + "loss": 0.5616, + "step": 89800 + }, + { + "epoch": 0.9, + "learning_rate": 8.106315789473683e-05, + "loss": 0.5512, + "step": 89810 + }, + { + "epoch": 0.9, + "learning_rate": 8.098421052631579e-05, + "loss": 0.5602, + "step": 89820 + }, + { + "epoch": 0.9, + "learning_rate": 8.090526315789474e-05, + "loss": 0.5565, + "step": 89830 + }, + { + "epoch": 0.9, + "learning_rate": 8.082631578947367e-05, + "loss": 0.5572, + "step": 89840 + }, + { + "epoch": 0.9, + "learning_rate": 8.074736842105262e-05, + "loss": 0.5627, + "step": 89850 + }, + { + "epoch": 0.9, + "learning_rate": 8.066842105263157e-05, + "loss": 0.5689, + "step": 89860 + }, + { + "epoch": 0.9, + "learning_rate": 8.058947368421053e-05, + "loss": 0.5642, + "step": 89870 + }, + { + "epoch": 0.9, + "learning_rate": 8.051052631578947e-05, + "loss": 0.5434, + "step": 89880 + }, + { + "epoch": 0.9, + "learning_rate": 8.043157894736842e-05, + "loss": 0.5498, + "step": 89890 + }, + { + "epoch": 0.9, + "learning_rate": 8.035263157894736e-05, + "loss": 0.5351, + "step": 89900 + }, + { + "epoch": 0.9, + "learning_rate": 8.02736842105263e-05, + "loss": 0.5417, + "step": 89910 + }, + { + "epoch": 0.9, + "learning_rate": 8.019473684210526e-05, + "loss": 0.5542, + "step": 89920 + }, + { + "epoch": 0.9, + "learning_rate": 8.011578947368421e-05, + "loss": 0.5592, + "step": 89930 + }, + { + "epoch": 0.9, + "learning_rate": 8.003684210526314e-05, + "loss": 0.5733, + "step": 89940 + }, + { + "epoch": 0.9, + "learning_rate": 7.995789473684209e-05, + "loss": 0.571, + "step": 89950 + }, + { + "epoch": 0.9, + "learning_rate": 7.987894736842105e-05, + "loss": 0.5549, + "step": 89960 + }, + { + "epoch": 0.9, + "learning_rate": 7.98e-05, + "loss": 0.5582, + "step": 89970 + }, + { + "epoch": 0.9, + "learning_rate": 7.972105263157894e-05, + "loss": 0.564, + "step": 89980 + }, + { + "epoch": 0.9, + "learning_rate": 7.964210526315788e-05, + "loss": 0.565, + "step": 89990 + }, + { + "epoch": 0.9, + "learning_rate": 7.956315789473683e-05, + "loss": 0.5724, + "step": 90000 + }, + { + "epoch": 0.9, + "eval_accuracy": 0.8820988856910535, + "eval_loss": 0.537109375, + "eval_runtime": 97.2134, + "eval_samples_per_second": 822.932, + "eval_steps_per_second": 1.615, + "step": 90000 + }, + { + "epoch": 0.9, + "learning_rate": 7.94842105263158e-05, + "loss": 0.5633, + "step": 90010 + }, + { + "epoch": 0.9, + "learning_rate": 7.940526315789473e-05, + "loss": 0.555, + "step": 90020 + }, + { + "epoch": 0.9, + "learning_rate": 7.932631578947368e-05, + "loss": 0.5656, + "step": 90030 + }, + { + "epoch": 0.9, + "learning_rate": 7.924736842105263e-05, + "loss": 0.5636, + "step": 90040 + }, + { + "epoch": 0.9, + "learning_rate": 7.916842105263156e-05, + "loss": 0.5523, + "step": 90050 + }, + { + "epoch": 0.9, + "learning_rate": 7.908947368421052e-05, + "loss": 0.5567, + "step": 90060 + }, + { + "epoch": 0.9, + "learning_rate": 7.901052631578947e-05, + "loss": 0.5561, + "step": 90070 + }, + { + "epoch": 0.9, + "learning_rate": 7.89315789473684e-05, + "loss": 0.5568, + "step": 90080 + }, + { + "epoch": 0.9, + "learning_rate": 7.885263157894735e-05, + "loss": 0.5597, + "step": 90090 + }, + { + "epoch": 0.9, + "learning_rate": 7.878157894736842e-05, + "loss": 0.5579, + "step": 90100 + }, + { + "epoch": 0.9, + "learning_rate": 7.870263157894735e-05, + "loss": 0.5622, + "step": 90110 + }, + { + "epoch": 0.9, + "learning_rate": 7.862368421052631e-05, + "loss": 0.5563, + "step": 90120 + }, + { + "epoch": 0.9, + "learning_rate": 7.854473684210526e-05, + "loss": 0.5649, + "step": 90130 + }, + { + "epoch": 0.9, + "learning_rate": 7.846578947368421e-05, + "loss": 0.5568, + "step": 90140 + }, + { + "epoch": 0.9, + "learning_rate": 7.838684210526314e-05, + "loss": 0.5674, + "step": 90150 + }, + { + "epoch": 0.9, + "learning_rate": 7.830789473684209e-05, + "loss": 0.5641, + "step": 90160 + }, + { + "epoch": 0.9, + "learning_rate": 7.822894736842105e-05, + "loss": 0.5598, + "step": 90170 + }, + { + "epoch": 0.9, + "learning_rate": 7.815e-05, + "loss": 0.567, + "step": 90180 + }, + { + "epoch": 0.9, + "learning_rate": 7.807105263157894e-05, + "loss": 0.5678, + "step": 90190 + }, + { + "epoch": 0.9, + "learning_rate": 7.799210526315789e-05, + "loss": 0.5584, + "step": 90200 + }, + { + "epoch": 0.9, + "learning_rate": 7.791315789473683e-05, + "loss": 0.566, + "step": 90210 + }, + { + "epoch": 0.9, + "learning_rate": 7.783421052631578e-05, + "loss": 0.5538, + "step": 90220 + }, + { + "epoch": 0.9, + "learning_rate": 7.775526315789473e-05, + "loss": 0.5639, + "step": 90230 + }, + { + "epoch": 0.9, + "learning_rate": 7.767631578947368e-05, + "loss": 0.5665, + "step": 90240 + }, + { + "epoch": 0.9, + "learning_rate": 7.759736842105261e-05, + "loss": 0.5691, + "step": 90250 + }, + { + "epoch": 0.9, + "learning_rate": 7.751842105263158e-05, + "loss": 0.5702, + "step": 90260 + }, + { + "epoch": 0.9, + "learning_rate": 7.743947368421052e-05, + "loss": 0.5608, + "step": 90270 + }, + { + "epoch": 0.9, + "learning_rate": 7.736052631578947e-05, + "loss": 0.5412, + "step": 90280 + }, + { + "epoch": 0.9, + "learning_rate": 7.72815789473684e-05, + "loss": 0.554, + "step": 90290 + }, + { + "epoch": 0.9, + "learning_rate": 7.720263157894735e-05, + "loss": 0.5615, + "step": 90300 + }, + { + "epoch": 0.9, + "learning_rate": 7.712368421052632e-05, + "loss": 0.5645, + "step": 90310 + }, + { + "epoch": 0.9, + "learning_rate": 7.704473684210526e-05, + "loss": 0.5545, + "step": 90320 + }, + { + "epoch": 0.9, + "learning_rate": 7.69657894736842e-05, + "loss": 0.5475, + "step": 90330 + }, + { + "epoch": 0.9, + "learning_rate": 7.688684210526315e-05, + "loss": 0.5529, + "step": 90340 + }, + { + "epoch": 0.9, + "learning_rate": 7.68078947368421e-05, + "loss": 0.5619, + "step": 90350 + }, + { + "epoch": 0.9, + "learning_rate": 7.672894736842104e-05, + "loss": 0.5584, + "step": 90360 + }, + { + "epoch": 0.9, + "learning_rate": 7.664999999999999e-05, + "loss": 0.5619, + "step": 90370 + }, + { + "epoch": 0.9, + "learning_rate": 7.657105263157894e-05, + "loss": 0.5522, + "step": 90380 + }, + { + "epoch": 0.9, + "learning_rate": 7.649210526315788e-05, + "loss": 0.5677, + "step": 90390 + }, + { + "epoch": 0.9, + "learning_rate": 7.641315789473684e-05, + "loss": 0.5508, + "step": 90400 + }, + { + "epoch": 0.9, + "learning_rate": 7.633421052631579e-05, + "loss": 0.5749, + "step": 90410 + }, + { + "epoch": 0.9, + "learning_rate": 7.625526315789473e-05, + "loss": 0.5545, + "step": 90420 + }, + { + "epoch": 0.9, + "learning_rate": 7.617631578947367e-05, + "loss": 0.5567, + "step": 90430 + }, + { + "epoch": 0.9, + "learning_rate": 7.609736842105262e-05, + "loss": 0.5543, + "step": 90440 + }, + { + "epoch": 0.9, + "learning_rate": 7.601842105263158e-05, + "loss": 0.5609, + "step": 90450 + }, + { + "epoch": 0.9, + "learning_rate": 7.593947368421053e-05, + "loss": 0.5603, + "step": 90460 + }, + { + "epoch": 0.9, + "learning_rate": 7.586052631578946e-05, + "loss": 0.5612, + "step": 90470 + }, + { + "epoch": 0.9, + "learning_rate": 7.578157894736841e-05, + "loss": 0.5648, + "step": 90480 + }, + { + "epoch": 0.9, + "learning_rate": 7.570263157894736e-05, + "loss": 0.5653, + "step": 90490 + }, + { + "epoch": 0.91, + "learning_rate": 7.562368421052632e-05, + "loss": 0.5616, + "step": 90500 + }, + { + "epoch": 0.91, + "learning_rate": 7.554473684210526e-05, + "loss": 0.5541, + "step": 90510 + }, + { + "epoch": 0.91, + "learning_rate": 7.54657894736842e-05, + "loss": 0.563, + "step": 90520 + }, + { + "epoch": 0.91, + "learning_rate": 7.538684210526315e-05, + "loss": 0.5585, + "step": 90530 + }, + { + "epoch": 0.91, + "learning_rate": 7.53078947368421e-05, + "loss": 0.544, + "step": 90540 + }, + { + "epoch": 0.91, + "learning_rate": 7.522894736842105e-05, + "loss": 0.5487, + "step": 90550 + }, + { + "epoch": 0.91, + "learning_rate": 7.515e-05, + "loss": 0.5548, + "step": 90560 + }, + { + "epoch": 0.91, + "learning_rate": 7.507105263157893e-05, + "loss": 0.5547, + "step": 90570 + }, + { + "epoch": 0.91, + "learning_rate": 7.49921052631579e-05, + "loss": 0.5555, + "step": 90580 + }, + { + "epoch": 0.91, + "learning_rate": 7.491315789473683e-05, + "loss": 0.5652, + "step": 90590 + }, + { + "epoch": 0.91, + "learning_rate": 7.483421052631579e-05, + "loss": 0.5609, + "step": 90600 + }, + { + "epoch": 0.91, + "learning_rate": 7.475526315789473e-05, + "loss": 0.5549, + "step": 90610 + }, + { + "epoch": 0.91, + "learning_rate": 7.467631578947369e-05, + "loss": 0.5575, + "step": 90620 + }, + { + "epoch": 0.91, + "learning_rate": 7.459736842105262e-05, + "loss": 0.5562, + "step": 90630 + }, + { + "epoch": 0.91, + "learning_rate": 7.451842105263157e-05, + "loss": 0.5562, + "step": 90640 + }, + { + "epoch": 0.91, + "learning_rate": 7.443947368421052e-05, + "loss": 0.5541, + "step": 90650 + }, + { + "epoch": 0.91, + "learning_rate": 7.436052631578947e-05, + "loss": 0.5454, + "step": 90660 + }, + { + "epoch": 0.91, + "learning_rate": 7.428157894736841e-05, + "loss": 0.5686, + "step": 90670 + }, + { + "epoch": 0.91, + "learning_rate": 7.420263157894736e-05, + "loss": 0.5682, + "step": 90680 + }, + { + "epoch": 0.91, + "learning_rate": 7.412368421052631e-05, + "loss": 0.5579, + "step": 90690 + }, + { + "epoch": 0.91, + "learning_rate": 7.404473684210526e-05, + "loss": 0.562, + "step": 90700 + }, + { + "epoch": 0.91, + "learning_rate": 7.39657894736842e-05, + "loss": 0.5628, + "step": 90710 + }, + { + "epoch": 0.91, + "learning_rate": 7.388684210526316e-05, + "loss": 0.5559, + "step": 90720 + }, + { + "epoch": 0.91, + "learning_rate": 7.380789473684209e-05, + "loss": 0.5576, + "step": 90730 + }, + { + "epoch": 0.91, + "learning_rate": 7.372894736842105e-05, + "loss": 0.5532, + "step": 90740 + }, + { + "epoch": 0.91, + "learning_rate": 7.364999999999999e-05, + "loss": 0.5491, + "step": 90750 + }, + { + "epoch": 0.91, + "learning_rate": 7.357105263157895e-05, + "loss": 0.5621, + "step": 90760 + }, + { + "epoch": 0.91, + "learning_rate": 7.349210526315788e-05, + "loss": 0.5491, + "step": 90770 + }, + { + "epoch": 0.91, + "learning_rate": 7.341315789473683e-05, + "loss": 0.5474, + "step": 90780 + }, + { + "epoch": 0.91, + "learning_rate": 7.333421052631578e-05, + "loss": 0.561, + "step": 90790 + }, + { + "epoch": 0.91, + "learning_rate": 7.325526315789473e-05, + "loss": 0.5569, + "step": 90800 + }, + { + "epoch": 0.91, + "learning_rate": 7.317631578947368e-05, + "loss": 0.5555, + "step": 90810 + }, + { + "epoch": 0.91, + "learning_rate": 7.309736842105263e-05, + "loss": 0.5637, + "step": 90820 + }, + { + "epoch": 0.91, + "learning_rate": 7.301842105263157e-05, + "loss": 0.5755, + "step": 90830 + }, + { + "epoch": 0.91, + "learning_rate": 7.293947368421052e-05, + "loss": 0.5626, + "step": 90840 + }, + { + "epoch": 0.91, + "learning_rate": 7.286052631578946e-05, + "loss": 0.5753, + "step": 90850 + }, + { + "epoch": 0.91, + "learning_rate": 7.278157894736842e-05, + "loss": 0.5672, + "step": 90860 + }, + { + "epoch": 0.91, + "learning_rate": 7.270263157894735e-05, + "loss": 0.5644, + "step": 90870 + }, + { + "epoch": 0.91, + "learning_rate": 7.262368421052632e-05, + "loss": 0.5543, + "step": 90880 + }, + { + "epoch": 0.91, + "learning_rate": 7.254473684210525e-05, + "loss": 0.5625, + "step": 90890 + }, + { + "epoch": 0.91, + "learning_rate": 7.246578947368421e-05, + "loss": 0.5621, + "step": 90900 + }, + { + "epoch": 0.91, + "learning_rate": 7.238684210526315e-05, + "loss": 0.565, + "step": 90910 + }, + { + "epoch": 0.91, + "learning_rate": 7.23078947368421e-05, + "loss": 0.5587, + "step": 90920 + }, + { + "epoch": 0.91, + "learning_rate": 7.222894736842104e-05, + "loss": 0.5577, + "step": 90930 + }, + { + "epoch": 0.91, + "learning_rate": 7.214999999999999e-05, + "loss": 0.5575, + "step": 90940 + }, + { + "epoch": 0.91, + "learning_rate": 7.207105263157894e-05, + "loss": 0.5588, + "step": 90950 + }, + { + "epoch": 0.91, + "learning_rate": 7.199210526315789e-05, + "loss": 0.5574, + "step": 90960 + }, + { + "epoch": 0.91, + "learning_rate": 7.191315789473684e-05, + "loss": 0.5623, + "step": 90970 + }, + { + "epoch": 0.91, + "learning_rate": 7.183421052631579e-05, + "loss": 0.5494, + "step": 90980 + }, + { + "epoch": 0.91, + "learning_rate": 7.175526315789473e-05, + "loss": 0.5557, + "step": 90990 + }, + { + "epoch": 0.91, + "learning_rate": 7.167631578947368e-05, + "loss": 0.5554, + "step": 91000 + }, + { + "epoch": 0.91, + "learning_rate": 7.159736842105262e-05, + "loss": 0.5635, + "step": 91010 + }, + { + "epoch": 0.91, + "learning_rate": 7.151842105263158e-05, + "loss": 0.5585, + "step": 91020 + }, + { + "epoch": 0.91, + "learning_rate": 7.143947368421051e-05, + "loss": 0.5526, + "step": 91030 + }, + { + "epoch": 0.91, + "learning_rate": 7.136052631578947e-05, + "loss": 0.5539, + "step": 91040 + }, + { + "epoch": 0.91, + "learning_rate": 7.128157894736841e-05, + "loss": 0.5575, + "step": 91050 + }, + { + "epoch": 0.91, + "learning_rate": 7.120263157894736e-05, + "loss": 0.5483, + "step": 91060 + }, + { + "epoch": 0.91, + "learning_rate": 7.11236842105263e-05, + "loss": 0.5647, + "step": 91070 + }, + { + "epoch": 0.91, + "learning_rate": 7.104473684210525e-05, + "loss": 0.5564, + "step": 91080 + }, + { + "epoch": 0.91, + "learning_rate": 7.09657894736842e-05, + "loss": 0.5727, + "step": 91090 + }, + { + "epoch": 0.91, + "learning_rate": 7.089473684210525e-05, + "loss": 0.5502, + "step": 91100 + }, + { + "epoch": 0.91, + "learning_rate": 7.081578947368421e-05, + "loss": 0.5648, + "step": 91110 + }, + { + "epoch": 0.91, + "learning_rate": 7.073684210526315e-05, + "loss": 0.5551, + "step": 91120 + }, + { + "epoch": 0.91, + "learning_rate": 7.06578947368421e-05, + "loss": 0.5649, + "step": 91130 + }, + { + "epoch": 0.91, + "learning_rate": 7.057894736842104e-05, + "loss": 0.5595, + "step": 91140 + }, + { + "epoch": 0.91, + "learning_rate": 7.049999999999999e-05, + "loss": 0.5592, + "step": 91150 + }, + { + "epoch": 0.91, + "learning_rate": 7.042105263157894e-05, + "loss": 0.5569, + "step": 91160 + }, + { + "epoch": 0.91, + "learning_rate": 7.034210526315789e-05, + "loss": 0.5577, + "step": 91170 + }, + { + "epoch": 0.91, + "learning_rate": 7.026315789473684e-05, + "loss": 0.567, + "step": 91180 + }, + { + "epoch": 0.91, + "learning_rate": 7.018421052631579e-05, + "loss": 0.5574, + "step": 91190 + }, + { + "epoch": 0.91, + "learning_rate": 7.010526315789473e-05, + "loss": 0.5472, + "step": 91200 + }, + { + "epoch": 0.91, + "learning_rate": 7.002631578947368e-05, + "loss": 0.5459, + "step": 91210 + }, + { + "epoch": 0.91, + "learning_rate": 6.994736842105262e-05, + "loss": 0.5495, + "step": 91220 + }, + { + "epoch": 0.91, + "learning_rate": 6.986842105263158e-05, + "loss": 0.5771, + "step": 91230 + }, + { + "epoch": 0.91, + "learning_rate": 6.978947368421051e-05, + "loss": 0.5726, + "step": 91240 + }, + { + "epoch": 0.91, + "learning_rate": 6.971052631578948e-05, + "loss": 0.5685, + "step": 91250 + }, + { + "epoch": 0.91, + "learning_rate": 6.963157894736841e-05, + "loss": 0.568, + "step": 91260 + }, + { + "epoch": 0.91, + "learning_rate": 6.955263157894737e-05, + "loss": 0.5633, + "step": 91270 + }, + { + "epoch": 0.91, + "learning_rate": 6.947368421052631e-05, + "loss": 0.544, + "step": 91280 + }, + { + "epoch": 0.91, + "learning_rate": 6.939473684210526e-05, + "loss": 0.5422, + "step": 91290 + }, + { + "epoch": 0.91, + "learning_rate": 6.93157894736842e-05, + "loss": 0.5397, + "step": 91300 + }, + { + "epoch": 0.91, + "learning_rate": 6.923684210526315e-05, + "loss": 0.565, + "step": 91310 + }, + { + "epoch": 0.91, + "learning_rate": 6.91578947368421e-05, + "loss": 0.5302, + "step": 91320 + }, + { + "epoch": 0.91, + "learning_rate": 6.907894736842105e-05, + "loss": 0.5405, + "step": 91330 + }, + { + "epoch": 0.91, + "learning_rate": 6.9e-05, + "loss": 0.5276, + "step": 91340 + }, + { + "epoch": 0.91, + "learning_rate": 6.892105263157894e-05, + "loss": 0.5401, + "step": 91350 + }, + { + "epoch": 0.91, + "learning_rate": 6.884210526315788e-05, + "loss": 0.5361, + "step": 91360 + }, + { + "epoch": 0.91, + "learning_rate": 6.876315789473684e-05, + "loss": 0.5328, + "step": 91370 + }, + { + "epoch": 0.91, + "learning_rate": 6.868421052631578e-05, + "loss": 0.5281, + "step": 91380 + }, + { + "epoch": 0.91, + "learning_rate": 6.860526315789474e-05, + "loss": 0.5363, + "step": 91390 + }, + { + "epoch": 0.91, + "learning_rate": 6.852631578947367e-05, + "loss": 0.5341, + "step": 91400 + }, + { + "epoch": 0.91, + "learning_rate": 6.844736842105263e-05, + "loss": 0.545, + "step": 91410 + }, + { + "epoch": 0.91, + "learning_rate": 6.836842105263157e-05, + "loss": 0.5369, + "step": 91420 + }, + { + "epoch": 0.91, + "learning_rate": 6.828947368421052e-05, + "loss": 0.5564, + "step": 91430 + }, + { + "epoch": 0.91, + "learning_rate": 6.821052631578947e-05, + "loss": 0.5442, + "step": 91440 + }, + { + "epoch": 0.91, + "learning_rate": 6.813157894736841e-05, + "loss": 0.5697, + "step": 91450 + }, + { + "epoch": 0.91, + "learning_rate": 6.805263157894736e-05, + "loss": 0.5594, + "step": 91460 + }, + { + "epoch": 0.91, + "learning_rate": 6.797368421052631e-05, + "loss": 0.5676, + "step": 91470 + }, + { + "epoch": 0.91, + "learning_rate": 6.789473684210526e-05, + "loss": 0.561, + "step": 91480 + }, + { + "epoch": 0.91, + "learning_rate": 6.781578947368421e-05, + "loss": 0.5583, + "step": 91490 + }, + { + "epoch": 0.92, + "learning_rate": 6.773684210526316e-05, + "loss": 0.5535, + "step": 91500 + }, + { + "epoch": 0.92, + "learning_rate": 6.76578947368421e-05, + "loss": 0.5649, + "step": 91510 + }, + { + "epoch": 0.92, + "learning_rate": 6.757894736842104e-05, + "loss": 0.559, + "step": 91520 + }, + { + "epoch": 0.92, + "learning_rate": 6.75e-05, + "loss": 0.5572, + "step": 91530 + }, + { + "epoch": 0.92, + "learning_rate": 6.742105263157894e-05, + "loss": 0.5607, + "step": 91540 + }, + { + "epoch": 0.92, + "learning_rate": 6.73421052631579e-05, + "loss": 0.5493, + "step": 91550 + }, + { + "epoch": 0.92, + "learning_rate": 6.726315789473683e-05, + "loss": 0.5643, + "step": 91560 + }, + { + "epoch": 0.92, + "learning_rate": 6.71842105263158e-05, + "loss": 0.5615, + "step": 91570 + }, + { + "epoch": 0.92, + "learning_rate": 6.710526315789473e-05, + "loss": 0.5482, + "step": 91580 + }, + { + "epoch": 0.92, + "learning_rate": 6.702631578947368e-05, + "loss": 0.5503, + "step": 91590 + }, + { + "epoch": 0.92, + "learning_rate": 6.694736842105263e-05, + "loss": 0.5373, + "step": 91600 + }, + { + "epoch": 0.92, + "learning_rate": 6.686842105263157e-05, + "loss": 0.538, + "step": 91610 + }, + { + "epoch": 0.92, + "learning_rate": 6.678947368421052e-05, + "loss": 0.541, + "step": 91620 + }, + { + "epoch": 0.92, + "learning_rate": 6.671052631578947e-05, + "loss": 0.5373, + "step": 91630 + }, + { + "epoch": 0.92, + "learning_rate": 6.663157894736842e-05, + "loss": 0.5468, + "step": 91640 + }, + { + "epoch": 0.92, + "learning_rate": 6.655263157894737e-05, + "loss": 0.5584, + "step": 91650 + }, + { + "epoch": 0.92, + "learning_rate": 6.64736842105263e-05, + "loss": 0.5783, + "step": 91660 + }, + { + "epoch": 0.92, + "learning_rate": 6.639473684210526e-05, + "loss": 0.5676, + "step": 91670 + }, + { + "epoch": 0.92, + "learning_rate": 6.63157894736842e-05, + "loss": 0.5649, + "step": 91680 + }, + { + "epoch": 0.92, + "learning_rate": 6.623684210526316e-05, + "loss": 0.5692, + "step": 91690 + }, + { + "epoch": 0.92, + "learning_rate": 6.61578947368421e-05, + "loss": 0.573, + "step": 91700 + }, + { + "epoch": 0.92, + "learning_rate": 6.607894736842106e-05, + "loss": 0.5864, + "step": 91710 + }, + { + "epoch": 0.92, + "learning_rate": 6.599999999999999e-05, + "loss": 0.5886, + "step": 91720 + }, + { + "epoch": 0.92, + "learning_rate": 6.592105263157894e-05, + "loss": 0.5783, + "step": 91730 + }, + { + "epoch": 0.92, + "learning_rate": 6.584210526315789e-05, + "loss": 0.5594, + "step": 91740 + }, + { + "epoch": 0.92, + "learning_rate": 6.576315789473684e-05, + "loss": 0.5523, + "step": 91750 + }, + { + "epoch": 0.92, + "learning_rate": 6.568421052631578e-05, + "loss": 0.5319, + "step": 91760 + }, + { + "epoch": 0.92, + "learning_rate": 6.560526315789473e-05, + "loss": 0.5527, + "step": 91770 + }, + { + "epoch": 0.92, + "learning_rate": 6.552631578947368e-05, + "loss": 0.545, + "step": 91780 + }, + { + "epoch": 0.92, + "learning_rate": 6.544736842105263e-05, + "loss": 0.5667, + "step": 91790 + }, + { + "epoch": 0.92, + "learning_rate": 6.536842105263156e-05, + "loss": 0.5477, + "step": 91800 + }, + { + "epoch": 0.92, + "learning_rate": 6.528947368421053e-05, + "loss": 0.5548, + "step": 91810 + }, + { + "epoch": 0.92, + "learning_rate": 6.521052631578946e-05, + "loss": 0.5453, + "step": 91820 + }, + { + "epoch": 0.92, + "learning_rate": 6.513157894736842e-05, + "loss": 0.5416, + "step": 91830 + }, + { + "epoch": 0.92, + "learning_rate": 6.505263157894736e-05, + "loss": 0.5347, + "step": 91840 + }, + { + "epoch": 0.92, + "learning_rate": 6.497368421052632e-05, + "loss": 0.5378, + "step": 91850 + }, + { + "epoch": 0.92, + "learning_rate": 6.489473684210525e-05, + "loss": 0.5349, + "step": 91860 + }, + { + "epoch": 0.92, + "learning_rate": 6.48157894736842e-05, + "loss": 0.5472, + "step": 91870 + }, + { + "epoch": 0.92, + "learning_rate": 6.473684210526315e-05, + "loss": 0.5486, + "step": 91880 + }, + { + "epoch": 0.92, + "learning_rate": 6.46578947368421e-05, + "loss": 0.5664, + "step": 91890 + }, + { + "epoch": 0.92, + "learning_rate": 6.457894736842105e-05, + "loss": 0.5623, + "step": 91900 + }, + { + "epoch": 0.92, + "learning_rate": 6.45e-05, + "loss": 0.5685, + "step": 91910 + }, + { + "epoch": 0.92, + "learning_rate": 6.442105263157894e-05, + "loss": 0.5764, + "step": 91920 + }, + { + "epoch": 0.92, + "learning_rate": 6.434210526315789e-05, + "loss": 0.5662, + "step": 91930 + }, + { + "epoch": 0.92, + "learning_rate": 6.426315789473683e-05, + "loss": 0.5745, + "step": 91940 + }, + { + "epoch": 0.92, + "learning_rate": 6.418421052631579e-05, + "loss": 0.5882, + "step": 91950 + }, + { + "epoch": 0.92, + "learning_rate": 6.410526315789472e-05, + "loss": 0.5799, + "step": 91960 + }, + { + "epoch": 0.92, + "learning_rate": 6.402631578947369e-05, + "loss": 0.5643, + "step": 91970 + }, + { + "epoch": 0.92, + "learning_rate": 6.394736842105262e-05, + "loss": 0.5662, + "step": 91980 + }, + { + "epoch": 0.92, + "learning_rate": 6.386842105263158e-05, + "loss": 0.5588, + "step": 91990 + }, + { + "epoch": 0.92, + "learning_rate": 6.378947368421052e-05, + "loss": 0.5582, + "step": 92000 + }, + { + "epoch": 0.92, + "learning_rate": 6.371052631578947e-05, + "loss": 0.5585, + "step": 92010 + }, + { + "epoch": 0.92, + "learning_rate": 6.363157894736841e-05, + "loss": 0.5563, + "step": 92020 + }, + { + "epoch": 0.92, + "learning_rate": 6.355263157894736e-05, + "loss": 0.5533, + "step": 92030 + }, + { + "epoch": 0.92, + "learning_rate": 6.347368421052631e-05, + "loss": 0.559, + "step": 92040 + }, + { + "epoch": 0.92, + "learning_rate": 6.339473684210526e-05, + "loss": 0.5579, + "step": 92050 + }, + { + "epoch": 0.92, + "learning_rate": 6.33157894736842e-05, + "loss": 0.5512, + "step": 92060 + }, + { + "epoch": 0.92, + "learning_rate": 6.323684210526315e-05, + "loss": 0.5438, + "step": 92070 + }, + { + "epoch": 0.92, + "learning_rate": 6.315789473684209e-05, + "loss": 0.539, + "step": 92080 + }, + { + "epoch": 0.92, + "learning_rate": 6.307894736842105e-05, + "loss": 0.548, + "step": 92090 + }, + { + "epoch": 0.92, + "learning_rate": 6.30078947368421e-05, + "loss": 0.5482, + "step": 92100 + }, + { + "epoch": 0.92, + "learning_rate": 6.292894736842105e-05, + "loss": 0.5486, + "step": 92110 + }, + { + "epoch": 0.92, + "learning_rate": 6.285e-05, + "loss": 0.5571, + "step": 92120 + }, + { + "epoch": 0.92, + "learning_rate": 6.277105263157894e-05, + "loss": 0.5558, + "step": 92130 + }, + { + "epoch": 0.92, + "learning_rate": 6.269210526315789e-05, + "loss": 0.5622, + "step": 92140 + }, + { + "epoch": 0.92, + "learning_rate": 6.261315789473684e-05, + "loss": 0.5607, + "step": 92150 + }, + { + "epoch": 0.92, + "learning_rate": 6.253421052631579e-05, + "loss": 0.5745, + "step": 92160 + }, + { + "epoch": 0.92, + "learning_rate": 6.245526315789472e-05, + "loss": 0.5646, + "step": 92170 + }, + { + "epoch": 0.92, + "learning_rate": 6.237631578947369e-05, + "loss": 0.5684, + "step": 92180 + }, + { + "epoch": 0.92, + "learning_rate": 6.229736842105262e-05, + "loss": 0.5744, + "step": 92190 + }, + { + "epoch": 0.92, + "learning_rate": 6.221842105263157e-05, + "loss": 0.5742, + "step": 92200 + }, + { + "epoch": 0.92, + "learning_rate": 6.213947368421052e-05, + "loss": 0.5668, + "step": 92210 + }, + { + "epoch": 0.92, + "learning_rate": 6.206052631578947e-05, + "loss": 0.5487, + "step": 92220 + }, + { + "epoch": 0.92, + "learning_rate": 6.198157894736841e-05, + "loss": 0.5567, + "step": 92230 + }, + { + "epoch": 0.92, + "learning_rate": 6.190263157894736e-05, + "loss": 0.5486, + "step": 92240 + }, + { + "epoch": 0.92, + "learning_rate": 6.182368421052631e-05, + "loss": 0.5625, + "step": 92250 + }, + { + "epoch": 0.92, + "learning_rate": 6.174473684210526e-05, + "loss": 0.5562, + "step": 92260 + }, + { + "epoch": 0.92, + "learning_rate": 6.166578947368421e-05, + "loss": 0.549, + "step": 92270 + }, + { + "epoch": 0.92, + "learning_rate": 6.158684210526316e-05, + "loss": 0.5432, + "step": 92280 + }, + { + "epoch": 0.92, + "learning_rate": 6.15078947368421e-05, + "loss": 0.5462, + "step": 92290 + }, + { + "epoch": 0.92, + "learning_rate": 6.142894736842105e-05, + "loss": 0.5472, + "step": 92300 + }, + { + "epoch": 0.92, + "learning_rate": 6.134999999999999e-05, + "loss": 0.5388, + "step": 92310 + }, + { + "epoch": 0.92, + "learning_rate": 6.127105263157895e-05, + "loss": 0.535, + "step": 92320 + }, + { + "epoch": 0.92, + "learning_rate": 6.119210526315788e-05, + "loss": 0.5383, + "step": 92330 + }, + { + "epoch": 0.92, + "learning_rate": 6.111315789473685e-05, + "loss": 0.5431, + "step": 92340 + }, + { + "epoch": 0.92, + "learning_rate": 6.103421052631578e-05, + "loss": 0.5502, + "step": 92350 + }, + { + "epoch": 0.92, + "learning_rate": 6.0955263157894735e-05, + "loss": 0.5704, + "step": 92360 + }, + { + "epoch": 0.92, + "learning_rate": 6.087631578947368e-05, + "loss": 0.5768, + "step": 92370 + }, + { + "epoch": 0.92, + "learning_rate": 6.0797368421052625e-05, + "loss": 0.576, + "step": 92380 + }, + { + "epoch": 0.92, + "learning_rate": 6.071842105263157e-05, + "loss": 0.581, + "step": 92390 + }, + { + "epoch": 0.92, + "learning_rate": 6.063947368421052e-05, + "loss": 0.5734, + "step": 92400 + }, + { + "epoch": 0.92, + "learning_rate": 6.056052631578947e-05, + "loss": 0.5678, + "step": 92410 + }, + { + "epoch": 0.92, + "learning_rate": 6.048157894736841e-05, + "loss": 0.5702, + "step": 92420 + }, + { + "epoch": 0.92, + "learning_rate": 6.0402631578947367e-05, + "loss": 0.5698, + "step": 92430 + }, + { + "epoch": 0.92, + "learning_rate": 6.032368421052631e-05, + "loss": 0.579, + "step": 92440 + }, + { + "epoch": 0.92, + "learning_rate": 6.0244736842105256e-05, + "loss": 0.5601, + "step": 92450 + }, + { + "epoch": 0.92, + "learning_rate": 6.0165789473684205e-05, + "loss": 0.5581, + "step": 92460 + }, + { + "epoch": 0.92, + "learning_rate": 6.008684210526315e-05, + "loss": 0.562, + "step": 92470 + }, + { + "epoch": 0.92, + "learning_rate": 6.00078947368421e-05, + "loss": 0.5585, + "step": 92480 + }, + { + "epoch": 0.92, + "learning_rate": 5.992894736842104e-05, + "loss": 0.5743, + "step": 92490 + }, + { + "epoch": 0.93, + "learning_rate": 5.985e-05, + "loss": 0.5482, + "step": 92500 + }, + { + "epoch": 0.93, + "eval_accuracy": 0.8834968490752123, + "eval_loss": 0.52978515625, + "eval_runtime": 97.1569, + "eval_samples_per_second": 823.411, + "eval_steps_per_second": 1.616, + "step": 92500 + }, + { + "epoch": 0.93, + "learning_rate": 5.977105263157894e-05, + "loss": 0.5555, + "step": 92510 + }, + { + "epoch": 0.93, + "learning_rate": 5.969210526315789e-05, + "loss": 0.5454, + "step": 92520 + }, + { + "epoch": 0.93, + "learning_rate": 5.9613157894736836e-05, + "loss": 0.5387, + "step": 92530 + }, + { + "epoch": 0.93, + "learning_rate": 5.9534210526315784e-05, + "loss": 0.5603, + "step": 92540 + }, + { + "epoch": 0.93, + "learning_rate": 5.945526315789473e-05, + "loss": 0.5328, + "step": 92550 + }, + { + "epoch": 0.93, + "learning_rate": 5.9376315789473674e-05, + "loss": 0.5441, + "step": 92560 + }, + { + "epoch": 0.93, + "learning_rate": 5.929736842105263e-05, + "loss": 0.557, + "step": 92570 + }, + { + "epoch": 0.93, + "learning_rate": 5.921842105263157e-05, + "loss": 0.5579, + "step": 92580 + }, + { + "epoch": 0.93, + "learning_rate": 5.913947368421052e-05, + "loss": 0.5615, + "step": 92590 + }, + { + "epoch": 0.93, + "learning_rate": 5.906052631578947e-05, + "loss": 0.5585, + "step": 92600 + }, + { + "epoch": 0.93, + "learning_rate": 5.8981578947368416e-05, + "loss": 0.5563, + "step": 92610 + }, + { + "epoch": 0.93, + "learning_rate": 5.8902631578947364e-05, + "loss": 0.5681, + "step": 92620 + }, + { + "epoch": 0.93, + "learning_rate": 5.882368421052631e-05, + "loss": 0.5568, + "step": 92630 + }, + { + "epoch": 0.93, + "learning_rate": 5.874473684210526e-05, + "loss": 0.5636, + "step": 92640 + }, + { + "epoch": 0.93, + "learning_rate": 5.86657894736842e-05, + "loss": 0.552, + "step": 92650 + }, + { + "epoch": 0.93, + "learning_rate": 5.858684210526315e-05, + "loss": 0.5579, + "step": 92660 + }, + { + "epoch": 0.93, + "learning_rate": 5.85078947368421e-05, + "loss": 0.5406, + "step": 92670 + }, + { + "epoch": 0.93, + "learning_rate": 5.842894736842105e-05, + "loss": 0.5514, + "step": 92680 + }, + { + "epoch": 0.93, + "learning_rate": 5.8349999999999995e-05, + "loss": 0.5416, + "step": 92690 + }, + { + "epoch": 0.93, + "learning_rate": 5.8271052631578944e-05, + "loss": 0.5424, + "step": 92700 + }, + { + "epoch": 0.93, + "learning_rate": 5.819210526315789e-05, + "loss": 0.5331, + "step": 92710 + }, + { + "epoch": 0.93, + "learning_rate": 5.8113157894736833e-05, + "loss": 0.545, + "step": 92720 + }, + { + "epoch": 0.93, + "learning_rate": 5.803421052631578e-05, + "loss": 0.5257, + "step": 92730 + }, + { + "epoch": 0.93, + "learning_rate": 5.795526315789473e-05, + "loss": 0.5363, + "step": 92740 + }, + { + "epoch": 0.93, + "learning_rate": 5.787631578947368e-05, + "loss": 0.5345, + "step": 92750 + }, + { + "epoch": 0.93, + "learning_rate": 5.779736842105263e-05, + "loss": 0.5373, + "step": 92760 + }, + { + "epoch": 0.93, + "learning_rate": 5.7718421052631575e-05, + "loss": 0.5339, + "step": 92770 + }, + { + "epoch": 0.93, + "learning_rate": 5.763947368421052e-05, + "loss": 0.5269, + "step": 92780 + }, + { + "epoch": 0.93, + "learning_rate": 5.7560526315789465e-05, + "loss": 0.5483, + "step": 92790 + }, + { + "epoch": 0.93, + "learning_rate": 5.748157894736842e-05, + "loss": 0.5536, + "step": 92800 + }, + { + "epoch": 0.93, + "learning_rate": 5.740263157894736e-05, + "loss": 0.5591, + "step": 92810 + }, + { + "epoch": 0.93, + "learning_rate": 5.732368421052631e-05, + "loss": 0.5613, + "step": 92820 + }, + { + "epoch": 0.93, + "learning_rate": 5.724473684210526e-05, + "loss": 0.562, + "step": 92830 + }, + { + "epoch": 0.93, + "learning_rate": 5.7165789473684206e-05, + "loss": 0.5623, + "step": 92840 + }, + { + "epoch": 0.93, + "learning_rate": 5.7086842105263155e-05, + "loss": 0.5591, + "step": 92850 + }, + { + "epoch": 0.93, + "learning_rate": 5.70078947368421e-05, + "loss": 0.5741, + "step": 92860 + }, + { + "epoch": 0.93, + "learning_rate": 5.692894736842105e-05, + "loss": 0.5545, + "step": 92870 + }, + { + "epoch": 0.93, + "learning_rate": 5.684999999999999e-05, + "loss": 0.5488, + "step": 92880 + }, + { + "epoch": 0.93, + "learning_rate": 5.677105263157894e-05, + "loss": 0.5598, + "step": 92890 + }, + { + "epoch": 0.93, + "learning_rate": 5.669210526315789e-05, + "loss": 0.5502, + "step": 92900 + }, + { + "epoch": 0.93, + "learning_rate": 5.661315789473684e-05, + "loss": 0.5513, + "step": 92910 + }, + { + "epoch": 0.93, + "learning_rate": 5.6534210526315786e-05, + "loss": 0.5383, + "step": 92920 + }, + { + "epoch": 0.93, + "learning_rate": 5.6455263157894734e-05, + "loss": 0.5458, + "step": 92930 + }, + { + "epoch": 0.93, + "learning_rate": 5.637631578947368e-05, + "loss": 0.536, + "step": 92940 + }, + { + "epoch": 0.93, + "learning_rate": 5.6297368421052624e-05, + "loss": 0.5503, + "step": 92950 + }, + { + "epoch": 0.93, + "learning_rate": 5.621842105263157e-05, + "loss": 0.5452, + "step": 92960 + }, + { + "epoch": 0.93, + "learning_rate": 5.613947368421052e-05, + "loss": 0.5285, + "step": 92970 + }, + { + "epoch": 0.93, + "learning_rate": 5.606052631578947e-05, + "loss": 0.5303, + "step": 92980 + }, + { + "epoch": 0.93, + "learning_rate": 5.598157894736842e-05, + "loss": 0.5272, + "step": 92990 + }, + { + "epoch": 0.93, + "learning_rate": 5.5902631578947366e-05, + "loss": 0.5252, + "step": 93000 + }, + { + "epoch": 0.93, + "learning_rate": 5.5823684210526314e-05, + "loss": 0.5261, + "step": 93010 + }, + { + "epoch": 0.93, + "learning_rate": 5.5744736842105255e-05, + "loss": 0.5248, + "step": 93020 + }, + { + "epoch": 0.93, + "learning_rate": 5.5665789473684204e-05, + "loss": 0.53, + "step": 93030 + }, + { + "epoch": 0.93, + "learning_rate": 5.558684210526315e-05, + "loss": 0.5336, + "step": 93040 + }, + { + "epoch": 0.93, + "learning_rate": 5.55078947368421e-05, + "loss": 0.5329, + "step": 93050 + }, + { + "epoch": 0.93, + "learning_rate": 5.542894736842105e-05, + "loss": 0.5422, + "step": 93060 + }, + { + "epoch": 0.93, + "learning_rate": 5.535e-05, + "loss": 0.5743, + "step": 93070 + }, + { + "epoch": 0.93, + "learning_rate": 5.5271052631578945e-05, + "loss": 0.5587, + "step": 93080 + }, + { + "epoch": 0.93, + "learning_rate": 5.5192105263157894e-05, + "loss": 0.5608, + "step": 93090 + }, + { + "epoch": 0.93, + "learning_rate": 5.5113157894736835e-05, + "loss": 0.5682, + "step": 93100 + }, + { + "epoch": 0.93, + "learning_rate": 5.504210526315789e-05, + "loss": 0.5697, + "step": 93110 + }, + { + "epoch": 0.93, + "learning_rate": 5.496315789473683e-05, + "loss": 0.567, + "step": 93120 + }, + { + "epoch": 0.93, + "learning_rate": 5.4884210526315786e-05, + "loss": 0.5714, + "step": 93130 + }, + { + "epoch": 0.93, + "learning_rate": 5.480526315789473e-05, + "loss": 0.5616, + "step": 93140 + }, + { + "epoch": 0.93, + "learning_rate": 5.472631578947368e-05, + "loss": 0.5684, + "step": 93150 + }, + { + "epoch": 0.93, + "learning_rate": 5.4647368421052625e-05, + "loss": 0.559, + "step": 93160 + }, + { + "epoch": 0.93, + "learning_rate": 5.456842105263158e-05, + "loss": 0.5566, + "step": 93170 + }, + { + "epoch": 0.93, + "learning_rate": 5.448947368421052e-05, + "loss": 0.5645, + "step": 93180 + }, + { + "epoch": 0.93, + "learning_rate": 5.441052631578946e-05, + "loss": 0.5635, + "step": 93190 + }, + { + "epoch": 0.93, + "learning_rate": 5.433157894736842e-05, + "loss": 0.5472, + "step": 93200 + }, + { + "epoch": 0.93, + "learning_rate": 5.425263157894736e-05, + "loss": 0.5437, + "step": 93210 + }, + { + "epoch": 0.93, + "learning_rate": 5.4173684210526314e-05, + "loss": 0.5467, + "step": 93220 + }, + { + "epoch": 0.93, + "learning_rate": 5.4094736842105256e-05, + "loss": 0.5473, + "step": 93230 + }, + { + "epoch": 0.93, + "learning_rate": 5.401578947368421e-05, + "loss": 0.5332, + "step": 93240 + }, + { + "epoch": 0.93, + "learning_rate": 5.393684210526315e-05, + "loss": 0.5336, + "step": 93250 + }, + { + "epoch": 0.93, + "learning_rate": 5.3857894736842094e-05, + "loss": 0.5374, + "step": 93260 + }, + { + "epoch": 0.93, + "learning_rate": 5.377894736842105e-05, + "loss": 0.5484, + "step": 93270 + }, + { + "epoch": 0.93, + "learning_rate": 5.369999999999999e-05, + "loss": 0.5483, + "step": 93280 + }, + { + "epoch": 0.93, + "learning_rate": 5.3621052631578946e-05, + "loss": 0.5549, + "step": 93290 + }, + { + "epoch": 0.93, + "learning_rate": 5.354210526315789e-05, + "loss": 0.5658, + "step": 93300 + }, + { + "epoch": 0.93, + "learning_rate": 5.346315789473684e-05, + "loss": 0.5748, + "step": 93310 + }, + { + "epoch": 0.93, + "learning_rate": 5.3384210526315784e-05, + "loss": 0.5751, + "step": 93320 + }, + { + "epoch": 0.93, + "learning_rate": 5.3305263157894725e-05, + "loss": 0.5793, + "step": 93330 + }, + { + "epoch": 0.93, + "learning_rate": 5.322631578947368e-05, + "loss": 0.5721, + "step": 93340 + }, + { + "epoch": 0.93, + "learning_rate": 5.314736842105262e-05, + "loss": 0.576, + "step": 93350 + }, + { + "epoch": 0.93, + "learning_rate": 5.306842105263158e-05, + "loss": 0.5655, + "step": 93360 + }, + { + "epoch": 0.93, + "learning_rate": 5.298947368421052e-05, + "loss": 0.5695, + "step": 93370 + }, + { + "epoch": 0.93, + "learning_rate": 5.2910526315789474e-05, + "loss": 0.5617, + "step": 93380 + }, + { + "epoch": 0.93, + "learning_rate": 5.2831578947368415e-05, + "loss": 0.5572, + "step": 93390 + }, + { + "epoch": 0.93, + "learning_rate": 5.2752631578947364e-05, + "loss": 0.5574, + "step": 93400 + }, + { + "epoch": 0.93, + "learning_rate": 5.267368421052631e-05, + "loss": 0.5526, + "step": 93410 + }, + { + "epoch": 0.93, + "learning_rate": 5.259473684210525e-05, + "loss": 0.5534, + "step": 93420 + }, + { + "epoch": 0.93, + "learning_rate": 5.251578947368421e-05, + "loss": 0.551, + "step": 93430 + }, + { + "epoch": 0.93, + "learning_rate": 5.243684210526315e-05, + "loss": 0.5488, + "step": 93440 + }, + { + "epoch": 0.93, + "learning_rate": 5.2357894736842105e-05, + "loss": 0.5515, + "step": 93450 + }, + { + "epoch": 0.93, + "learning_rate": 5.2278947368421047e-05, + "loss": 0.5414, + "step": 93460 + }, + { + "epoch": 0.93, + "learning_rate": 5.2199999999999995e-05, + "loss": 0.5343, + "step": 93470 + }, + { + "epoch": 0.93, + "learning_rate": 5.212105263157894e-05, + "loss": 0.5388, + "step": 93480 + }, + { + "epoch": 0.93, + "learning_rate": 5.2042105263157885e-05, + "loss": 0.5437, + "step": 93490 + }, + { + "epoch": 0.94, + "learning_rate": 5.196315789473684e-05, + "loss": 0.5417, + "step": 93500 + }, + { + "epoch": 0.94, + "learning_rate": 5.188421052631578e-05, + "loss": 0.5515, + "step": 93510 + }, + { + "epoch": 0.94, + "learning_rate": 5.1805263157894736e-05, + "loss": 0.5587, + "step": 93520 + }, + { + "epoch": 0.94, + "learning_rate": 5.172631578947368e-05, + "loss": 0.5602, + "step": 93530 + }, + { + "epoch": 0.94, + "learning_rate": 5.1647368421052626e-05, + "loss": 0.5655, + "step": 93540 + }, + { + "epoch": 0.94, + "learning_rate": 5.1568421052631575e-05, + "loss": 0.5536, + "step": 93550 + }, + { + "epoch": 0.94, + "learning_rate": 5.1489473684210516e-05, + "loss": 0.5767, + "step": 93560 + }, + { + "epoch": 0.94, + "learning_rate": 5.141052631578947e-05, + "loss": 0.5697, + "step": 93570 + }, + { + "epoch": 0.94, + "learning_rate": 5.133157894736841e-05, + "loss": 0.5656, + "step": 93580 + }, + { + "epoch": 0.94, + "learning_rate": 5.125263157894737e-05, + "loss": 0.5627, + "step": 93590 + }, + { + "epoch": 0.94, + "learning_rate": 5.117368421052631e-05, + "loss": 0.5727, + "step": 93600 + }, + { + "epoch": 0.94, + "learning_rate": 5.109473684210526e-05, + "loss": 0.5581, + "step": 93610 + }, + { + "epoch": 0.94, + "learning_rate": 5.1015789473684206e-05, + "loss": 0.5502, + "step": 93620 + }, + { + "epoch": 0.94, + "learning_rate": 5.0936842105263154e-05, + "loss": 0.5505, + "step": 93630 + }, + { + "epoch": 0.94, + "learning_rate": 5.08578947368421e-05, + "loss": 0.5572, + "step": 93640 + }, + { + "epoch": 0.94, + "learning_rate": 5.0778947368421044e-05, + "loss": 0.5467, + "step": 93650 + }, + { + "epoch": 0.94, + "learning_rate": 5.07e-05, + "loss": 0.5507, + "step": 93660 + }, + { + "epoch": 0.94, + "learning_rate": 5.062105263157894e-05, + "loss": 0.5508, + "step": 93670 + }, + { + "epoch": 0.94, + "learning_rate": 5.054210526315789e-05, + "loss": 0.5531, + "step": 93680 + }, + { + "epoch": 0.94, + "learning_rate": 5.046315789473684e-05, + "loss": 0.5446, + "step": 93690 + }, + { + "epoch": 0.94, + "learning_rate": 5.0384210526315786e-05, + "loss": 0.5452, + "step": 93700 + }, + { + "epoch": 0.94, + "learning_rate": 5.0305263157894734e-05, + "loss": 0.5435, + "step": 93710 + }, + { + "epoch": 0.94, + "learning_rate": 5.0226315789473675e-05, + "loss": 0.5439, + "step": 93720 + }, + { + "epoch": 0.94, + "learning_rate": 5.014736842105263e-05, + "loss": 0.5391, + "step": 93730 + }, + { + "epoch": 0.94, + "learning_rate": 5.006842105263157e-05, + "loss": 0.5426, + "step": 93740 + }, + { + "epoch": 0.94, + "learning_rate": 4.998947368421052e-05, + "loss": 0.5356, + "step": 93750 + }, + { + "epoch": 0.94, + "learning_rate": 4.991052631578947e-05, + "loss": 0.5665, + "step": 93760 + }, + { + "epoch": 0.94, + "learning_rate": 4.983157894736842e-05, + "loss": 0.5653, + "step": 93770 + }, + { + "epoch": 0.94, + "learning_rate": 4.9752631578947365e-05, + "loss": 0.5651, + "step": 93780 + }, + { + "epoch": 0.94, + "learning_rate": 4.967368421052631e-05, + "loss": 0.5734, + "step": 93790 + }, + { + "epoch": 0.94, + "learning_rate": 4.959473684210526e-05, + "loss": 0.5607, + "step": 93800 + }, + { + "epoch": 0.94, + "learning_rate": 4.95157894736842e-05, + "loss": 0.5729, + "step": 93810 + }, + { + "epoch": 0.94, + "learning_rate": 4.943684210526315e-05, + "loss": 0.5705, + "step": 93820 + }, + { + "epoch": 0.94, + "learning_rate": 4.93578947368421e-05, + "loss": 0.564, + "step": 93830 + }, + { + "epoch": 0.94, + "learning_rate": 4.927894736842105e-05, + "loss": 0.5598, + "step": 93840 + }, + { + "epoch": 0.94, + "learning_rate": 4.9199999999999997e-05, + "loss": 0.551, + "step": 93850 + }, + { + "epoch": 0.94, + "learning_rate": 4.9121052631578945e-05, + "loss": 0.5414, + "step": 93860 + }, + { + "epoch": 0.94, + "learning_rate": 4.904210526315789e-05, + "loss": 0.5508, + "step": 93870 + }, + { + "epoch": 0.94, + "learning_rate": 4.8963157894736835e-05, + "loss": 0.5563, + "step": 93880 + }, + { + "epoch": 0.94, + "learning_rate": 4.888421052631578e-05, + "loss": 0.5493, + "step": 93890 + }, + { + "epoch": 0.94, + "learning_rate": 4.880526315789473e-05, + "loss": 0.5347, + "step": 93900 + }, + { + "epoch": 0.94, + "learning_rate": 4.872631578947368e-05, + "loss": 0.5435, + "step": 93910 + }, + { + "epoch": 0.94, + "learning_rate": 4.864736842105263e-05, + "loss": 0.5397, + "step": 93920 + }, + { + "epoch": 0.94, + "learning_rate": 4.8568421052631576e-05, + "loss": 0.5361, + "step": 93930 + }, + { + "epoch": 0.94, + "learning_rate": 4.8489473684210524e-05, + "loss": 0.533, + "step": 93940 + }, + { + "epoch": 0.94, + "learning_rate": 4.8410526315789466e-05, + "loss": 0.5266, + "step": 93950 + }, + { + "epoch": 0.94, + "learning_rate": 4.8331578947368414e-05, + "loss": 0.5381, + "step": 93960 + }, + { + "epoch": 0.94, + "learning_rate": 4.825263157894736e-05, + "loss": 0.5453, + "step": 93970 + }, + { + "epoch": 0.94, + "learning_rate": 4.817368421052631e-05, + "loss": 0.5353, + "step": 93980 + }, + { + "epoch": 0.94, + "learning_rate": 4.809473684210526e-05, + "loss": 0.5374, + "step": 93990 + }, + { + "epoch": 0.94, + "learning_rate": 4.801578947368421e-05, + "loss": 0.5461, + "step": 94000 + }, + { + "epoch": 0.94, + "learning_rate": 4.7936842105263156e-05, + "loss": 0.5481, + "step": 94010 + }, + { + "epoch": 0.94, + "learning_rate": 4.78578947368421e-05, + "loss": 0.5603, + "step": 94020 + }, + { + "epoch": 0.94, + "learning_rate": 4.7778947368421046e-05, + "loss": 0.5625, + "step": 94030 + }, + { + "epoch": 0.94, + "learning_rate": 4.7699999999999994e-05, + "loss": 0.568, + "step": 94040 + }, + { + "epoch": 0.94, + "learning_rate": 4.762105263157894e-05, + "loss": 0.5599, + "step": 94050 + }, + { + "epoch": 0.94, + "learning_rate": 4.754210526315789e-05, + "loss": 0.5612, + "step": 94060 + }, + { + "epoch": 0.94, + "learning_rate": 4.746315789473684e-05, + "loss": 0.5454, + "step": 94070 + }, + { + "epoch": 0.94, + "learning_rate": 4.738421052631579e-05, + "loss": 0.5487, + "step": 94080 + }, + { + "epoch": 0.94, + "learning_rate": 4.7305263157894735e-05, + "loss": 0.5448, + "step": 94090 + }, + { + "epoch": 0.94, + "learning_rate": 4.722631578947368e-05, + "loss": 0.5491, + "step": 94100 + }, + { + "epoch": 0.94, + "learning_rate": 4.715526315789473e-05, + "loss": 0.5526, + "step": 94110 + }, + { + "epoch": 0.94, + "learning_rate": 4.707631578947369e-05, + "loss": 0.5447, + "step": 94120 + }, + { + "epoch": 0.94, + "learning_rate": 4.699736842105263e-05, + "loss": 0.5488, + "step": 94130 + }, + { + "epoch": 0.94, + "learning_rate": 4.691842105263157e-05, + "loss": 0.5453, + "step": 94140 + }, + { + "epoch": 0.94, + "learning_rate": 4.6839473684210525e-05, + "loss": 0.5392, + "step": 94150 + }, + { + "epoch": 0.94, + "learning_rate": 4.6760526315789467e-05, + "loss": 0.5412, + "step": 94160 + }, + { + "epoch": 0.94, + "learning_rate": 4.668157894736842e-05, + "loss": 0.5335, + "step": 94170 + }, + { + "epoch": 0.94, + "learning_rate": 4.660263157894736e-05, + "loss": 0.5324, + "step": 94180 + }, + { + "epoch": 0.94, + "learning_rate": 4.652368421052632e-05, + "loss": 0.5441, + "step": 94190 + }, + { + "epoch": 0.94, + "learning_rate": 4.644473684210526e-05, + "loss": 0.5295, + "step": 94200 + }, + { + "epoch": 0.94, + "learning_rate": 4.63657894736842e-05, + "loss": 0.5475, + "step": 94210 + }, + { + "epoch": 0.94, + "learning_rate": 4.6286842105263156e-05, + "loss": 0.5501, + "step": 94220 + }, + { + "epoch": 0.94, + "learning_rate": 4.62078947368421e-05, + "loss": 0.567, + "step": 94230 + }, + { + "epoch": 0.94, + "learning_rate": 4.612894736842105e-05, + "loss": 0.5567, + "step": 94240 + }, + { + "epoch": 0.94, + "learning_rate": 4.6049999999999994e-05, + "loss": 0.5674, + "step": 94250 + }, + { + "epoch": 0.94, + "learning_rate": 4.597105263157895e-05, + "loss": 0.5685, + "step": 94260 + }, + { + "epoch": 0.94, + "learning_rate": 4.589210526315789e-05, + "loss": 0.5618, + "step": 94270 + }, + { + "epoch": 0.94, + "learning_rate": 4.581315789473683e-05, + "loss": 0.5669, + "step": 94280 + }, + { + "epoch": 0.94, + "learning_rate": 4.573421052631579e-05, + "loss": 0.5627, + "step": 94290 + }, + { + "epoch": 0.94, + "learning_rate": 4.565526315789473e-05, + "loss": 0.5577, + "step": 94300 + }, + { + "epoch": 0.94, + "learning_rate": 4.5576315789473684e-05, + "loss": 0.5517, + "step": 94310 + }, + { + "epoch": 0.94, + "learning_rate": 4.5497368421052626e-05, + "loss": 0.5455, + "step": 94320 + }, + { + "epoch": 0.94, + "learning_rate": 4.541842105263158e-05, + "loss": 0.5478, + "step": 94330 + }, + { + "epoch": 0.94, + "learning_rate": 4.533947368421052e-05, + "loss": 0.5443, + "step": 94340 + }, + { + "epoch": 0.94, + "learning_rate": 4.5260526315789464e-05, + "loss": 0.5539, + "step": 94350 + }, + { + "epoch": 0.94, + "learning_rate": 4.518157894736842e-05, + "loss": 0.5513, + "step": 94360 + }, + { + "epoch": 0.94, + "learning_rate": 4.510263157894736e-05, + "loss": 0.5428, + "step": 94370 + }, + { + "epoch": 0.94, + "learning_rate": 4.5023684210526316e-05, + "loss": 0.5497, + "step": 94380 + }, + { + "epoch": 0.94, + "learning_rate": 4.494473684210526e-05, + "loss": 0.5517, + "step": 94390 + }, + { + "epoch": 0.94, + "learning_rate": 4.486578947368421e-05, + "loss": 0.5433, + "step": 94400 + }, + { + "epoch": 0.94, + "learning_rate": 4.4786842105263154e-05, + "loss": 0.5405, + "step": 94410 + }, + { + "epoch": 0.94, + "learning_rate": 4.4707894736842095e-05, + "loss": 0.5429, + "step": 94420 + }, + { + "epoch": 0.94, + "learning_rate": 4.462894736842105e-05, + "loss": 0.5556, + "step": 94430 + }, + { + "epoch": 0.94, + "learning_rate": 4.454999999999999e-05, + "loss": 0.5511, + "step": 94440 + }, + { + "epoch": 0.94, + "learning_rate": 4.447105263157895e-05, + "loss": 0.5604, + "step": 94450 + }, + { + "epoch": 0.94, + "learning_rate": 4.439210526315789e-05, + "loss": 0.5699, + "step": 94460 + }, + { + "epoch": 0.94, + "learning_rate": 4.4313157894736844e-05, + "loss": 0.5701, + "step": 94470 + }, + { + "epoch": 0.94, + "learning_rate": 4.4234210526315785e-05, + "loss": 0.5673, + "step": 94480 + }, + { + "epoch": 0.94, + "learning_rate": 4.415526315789473e-05, + "loss": 0.5775, + "step": 94490 + }, + { + "epoch": 0.94, + "learning_rate": 4.407631578947368e-05, + "loss": 0.5747, + "step": 94500 + }, + { + "epoch": 0.95, + "learning_rate": 4.399736842105262e-05, + "loss": 0.5858, + "step": 94510 + }, + { + "epoch": 0.95, + "learning_rate": 4.391842105263158e-05, + "loss": 0.5853, + "step": 94520 + }, + { + "epoch": 0.95, + "learning_rate": 4.383947368421052e-05, + "loss": 0.5728, + "step": 94530 + }, + { + "epoch": 0.95, + "learning_rate": 4.3760526315789475e-05, + "loss": 0.5671, + "step": 94540 + }, + { + "epoch": 0.95, + "learning_rate": 4.3681578947368416e-05, + "loss": 0.5611, + "step": 94550 + }, + { + "epoch": 0.95, + "learning_rate": 4.360263157894736e-05, + "loss": 0.5633, + "step": 94560 + }, + { + "epoch": 0.95, + "learning_rate": 4.352368421052631e-05, + "loss": 0.5577, + "step": 94570 + }, + { + "epoch": 0.95, + "learning_rate": 4.3444736842105255e-05, + "loss": 0.5653, + "step": 94580 + }, + { + "epoch": 0.95, + "learning_rate": 4.336578947368421e-05, + "loss": 0.5482, + "step": 94590 + }, + { + "epoch": 0.95, + "learning_rate": 4.328684210526315e-05, + "loss": 0.5551, + "step": 94600 + }, + { + "epoch": 0.95, + "learning_rate": 4.3207894736842106e-05, + "loss": 0.5538, + "step": 94610 + }, + { + "epoch": 0.95, + "learning_rate": 4.312894736842105e-05, + "loss": 0.5382, + "step": 94620 + }, + { + "epoch": 0.95, + "learning_rate": 4.3049999999999996e-05, + "loss": 0.5542, + "step": 94630 + }, + { + "epoch": 0.95, + "learning_rate": 4.2971052631578944e-05, + "loss": 0.5521, + "step": 94640 + }, + { + "epoch": 0.95, + "learning_rate": 4.2892105263157886e-05, + "loss": 0.5469, + "step": 94650 + }, + { + "epoch": 0.95, + "learning_rate": 4.281315789473684e-05, + "loss": 0.5476, + "step": 94660 + }, + { + "epoch": 0.95, + "learning_rate": 4.273421052631578e-05, + "loss": 0.5429, + "step": 94670 + }, + { + "epoch": 0.95, + "learning_rate": 4.265526315789474e-05, + "loss": 0.5527, + "step": 94680 + }, + { + "epoch": 0.95, + "learning_rate": 4.257631578947368e-05, + "loss": 0.5632, + "step": 94690 + }, + { + "epoch": 0.95, + "learning_rate": 4.249736842105263e-05, + "loss": 0.5661, + "step": 94700 + }, + { + "epoch": 0.95, + "learning_rate": 4.2418421052631576e-05, + "loss": 0.5854, + "step": 94710 + }, + { + "epoch": 0.95, + "learning_rate": 4.233947368421052e-05, + "loss": 0.5721, + "step": 94720 + }, + { + "epoch": 0.95, + "learning_rate": 4.226052631578947e-05, + "loss": 0.5783, + "step": 94730 + }, + { + "epoch": 0.95, + "learning_rate": 4.2181578947368414e-05, + "loss": 0.5672, + "step": 94740 + }, + { + "epoch": 0.95, + "learning_rate": 4.210263157894737e-05, + "loss": 0.572, + "step": 94750 + }, + { + "epoch": 0.95, + "learning_rate": 4.202368421052631e-05, + "loss": 0.5643, + "step": 94760 + }, + { + "epoch": 0.95, + "learning_rate": 4.194473684210526e-05, + "loss": 0.5759, + "step": 94770 + }, + { + "epoch": 0.95, + "learning_rate": 4.186578947368421e-05, + "loss": 0.5706, + "step": 94780 + }, + { + "epoch": 0.95, + "learning_rate": 4.178684210526315e-05, + "loss": 0.5665, + "step": 94790 + }, + { + "epoch": 0.95, + "learning_rate": 4.1707894736842104e-05, + "loss": 0.5499, + "step": 94800 + }, + { + "epoch": 0.95, + "learning_rate": 4.1628947368421045e-05, + "loss": 0.548, + "step": 94810 + }, + { + "epoch": 0.95, + "learning_rate": 4.155e-05, + "loss": 0.5503, + "step": 94820 + }, + { + "epoch": 0.95, + "learning_rate": 4.147105263157894e-05, + "loss": 0.558, + "step": 94830 + }, + { + "epoch": 0.95, + "learning_rate": 4.139210526315789e-05, + "loss": 0.5583, + "step": 94840 + }, + { + "epoch": 0.95, + "learning_rate": 4.131315789473684e-05, + "loss": 0.548, + "step": 94850 + }, + { + "epoch": 0.95, + "learning_rate": 4.123421052631579e-05, + "loss": 0.5449, + "step": 94860 + }, + { + "epoch": 0.95, + "learning_rate": 4.1155263157894735e-05, + "loss": 0.5324, + "step": 94870 + }, + { + "epoch": 0.95, + "learning_rate": 4.1076315789473677e-05, + "loss": 0.5373, + "step": 94880 + }, + { + "epoch": 0.95, + "learning_rate": 4.099736842105263e-05, + "loss": 0.5574, + "step": 94890 + }, + { + "epoch": 0.95, + "learning_rate": 4.091842105263157e-05, + "loss": 0.5446, + "step": 94900 + }, + { + "epoch": 0.95, + "learning_rate": 4.083947368421052e-05, + "loss": 0.5565, + "step": 94910 + }, + { + "epoch": 0.95, + "learning_rate": 4.076052631578947e-05, + "loss": 0.5674, + "step": 94920 + }, + { + "epoch": 0.95, + "learning_rate": 4.068157894736842e-05, + "loss": 0.5634, + "step": 94930 + }, + { + "epoch": 0.95, + "learning_rate": 4.0602631578947366e-05, + "loss": 0.5778, + "step": 94940 + }, + { + "epoch": 0.95, + "learning_rate": 4.052368421052631e-05, + "loss": 0.5761, + "step": 94950 + }, + { + "epoch": 0.95, + "learning_rate": 4.044473684210526e-05, + "loss": 0.571, + "step": 94960 + }, + { + "epoch": 0.95, + "learning_rate": 4.0365789473684205e-05, + "loss": 0.5681, + "step": 94970 + }, + { + "epoch": 0.95, + "learning_rate": 4.028684210526315e-05, + "loss": 0.5841, + "step": 94980 + }, + { + "epoch": 0.95, + "learning_rate": 4.02078947368421e-05, + "loss": 0.5761, + "step": 94990 + }, + { + "epoch": 0.95, + "learning_rate": 4.012894736842105e-05, + "loss": 0.5829, + "step": 95000 + }, + { + "epoch": 0.95, + "eval_accuracy": 0.8839367508449035, + "eval_loss": 0.52685546875, + "eval_runtime": 98.3189, + "eval_samples_per_second": 813.679, + "eval_steps_per_second": 1.597, + "step": 95000 + }, + { + "epoch": 0.95, + "learning_rate": 4.005e-05, + "loss": 0.5568, + "step": 95010 + }, + { + "epoch": 0.95, + "learning_rate": 3.997105263157894e-05, + "loss": 0.5498, + "step": 95020 + }, + { + "epoch": 0.95, + "learning_rate": 3.9892105263157894e-05, + "loss": 0.5406, + "step": 95030 + }, + { + "epoch": 0.95, + "learning_rate": 3.9813157894736836e-05, + "loss": 0.5532, + "step": 95040 + }, + { + "epoch": 0.95, + "learning_rate": 3.9734210526315784e-05, + "loss": 0.5551, + "step": 95050 + }, + { + "epoch": 0.95, + "learning_rate": 3.965526315789473e-05, + "loss": 0.5595, + "step": 95060 + }, + { + "epoch": 0.95, + "learning_rate": 3.957631578947368e-05, + "loss": 0.5469, + "step": 95070 + }, + { + "epoch": 0.95, + "learning_rate": 3.949736842105263e-05, + "loss": 0.5454, + "step": 95080 + }, + { + "epoch": 0.95, + "learning_rate": 3.941842105263158e-05, + "loss": 0.5446, + "step": 95090 + }, + { + "epoch": 0.95, + "learning_rate": 3.9339473684210526e-05, + "loss": 0.535, + "step": 95100 + }, + { + "epoch": 0.95, + "learning_rate": 3.9268421052631574e-05, + "loss": 0.536, + "step": 95110 + }, + { + "epoch": 0.95, + "learning_rate": 3.918947368421053e-05, + "loss": 0.5379, + "step": 95120 + }, + { + "epoch": 0.95, + "learning_rate": 3.911052631578947e-05, + "loss": 0.5307, + "step": 95130 + }, + { + "epoch": 0.95, + "learning_rate": 3.903157894736841e-05, + "loss": 0.5353, + "step": 95140 + }, + { + "epoch": 0.95, + "learning_rate": 3.895263157894737e-05, + "loss": 0.5446, + "step": 95150 + }, + { + "epoch": 0.95, + "learning_rate": 3.887368421052631e-05, + "loss": 0.5689, + "step": 95160 + }, + { + "epoch": 0.95, + "learning_rate": 3.8794736842105264e-05, + "loss": 0.568, + "step": 95170 + }, + { + "epoch": 0.95, + "learning_rate": 3.8715789473684205e-05, + "loss": 0.5719, + "step": 95180 + }, + { + "epoch": 0.95, + "learning_rate": 3.863684210526316e-05, + "loss": 0.5674, + "step": 95190 + }, + { + "epoch": 0.95, + "learning_rate": 3.85578947368421e-05, + "loss": 0.5561, + "step": 95200 + }, + { + "epoch": 0.95, + "learning_rate": 3.847894736842104e-05, + "loss": 0.5712, + "step": 95210 + }, + { + "epoch": 0.95, + "learning_rate": 3.84e-05, + "loss": 0.5603, + "step": 95220 + }, + { + "epoch": 0.95, + "learning_rate": 3.832105263157894e-05, + "loss": 0.5641, + "step": 95230 + }, + { + "epoch": 0.95, + "learning_rate": 3.8242105263157895e-05, + "loss": 0.5608, + "step": 95240 + }, + { + "epoch": 0.95, + "learning_rate": 3.8163157894736836e-05, + "loss": 0.5598, + "step": 95250 + }, + { + "epoch": 0.95, + "learning_rate": 3.808421052631579e-05, + "loss": 0.5671, + "step": 95260 + }, + { + "epoch": 0.95, + "learning_rate": 3.800526315789473e-05, + "loss": 0.5663, + "step": 95270 + }, + { + "epoch": 0.95, + "learning_rate": 3.7926315789473675e-05, + "loss": 0.5518, + "step": 95280 + }, + { + "epoch": 0.95, + "learning_rate": 3.784736842105263e-05, + "loss": 0.5732, + "step": 95290 + }, + { + "epoch": 0.95, + "learning_rate": 3.776842105263157e-05, + "loss": 0.5565, + "step": 95300 + }, + { + "epoch": 0.95, + "learning_rate": 3.7689473684210526e-05, + "loss": 0.5531, + "step": 95310 + }, + { + "epoch": 0.95, + "learning_rate": 3.761052631578947e-05, + "loss": 0.5512, + "step": 95320 + }, + { + "epoch": 0.95, + "learning_rate": 3.753157894736842e-05, + "loss": 0.5591, + "step": 95330 + }, + { + "epoch": 0.95, + "learning_rate": 3.7452631578947364e-05, + "loss": 0.553, + "step": 95340 + }, + { + "epoch": 0.95, + "learning_rate": 3.737368421052631e-05, + "loss": 0.5402, + "step": 95350 + }, + { + "epoch": 0.95, + "learning_rate": 3.729473684210526e-05, + "loss": 0.5408, + "step": 95360 + }, + { + "epoch": 0.95, + "learning_rate": 3.721578947368421e-05, + "loss": 0.5403, + "step": 95370 + }, + { + "epoch": 0.95, + "learning_rate": 3.713684210526316e-05, + "loss": 0.5423, + "step": 95380 + }, + { + "epoch": 0.95, + "learning_rate": 3.70578947368421e-05, + "loss": 0.5352, + "step": 95390 + }, + { + "epoch": 0.95, + "learning_rate": 3.697894736842105e-05, + "loss": 0.5481, + "step": 95400 + }, + { + "epoch": 0.95, + "learning_rate": 3.6899999999999996e-05, + "loss": 0.5533, + "step": 95410 + }, + { + "epoch": 0.95, + "learning_rate": 3.6821052631578944e-05, + "loss": 0.548, + "step": 95420 + }, + { + "epoch": 0.95, + "learning_rate": 3.674210526315789e-05, + "loss": 0.5597, + "step": 95430 + }, + { + "epoch": 0.95, + "learning_rate": 3.666315789473684e-05, + "loss": 0.5572, + "step": 95440 + }, + { + "epoch": 0.95, + "learning_rate": 3.658421052631579e-05, + "loss": 0.5442, + "step": 95450 + }, + { + "epoch": 0.95, + "learning_rate": 3.650526315789473e-05, + "loss": 0.554, + "step": 95460 + }, + { + "epoch": 0.95, + "learning_rate": 3.642631578947368e-05, + "loss": 0.5457, + "step": 95470 + }, + { + "epoch": 0.95, + "learning_rate": 3.634736842105263e-05, + "loss": 0.5552, + "step": 95480 + }, + { + "epoch": 0.95, + "learning_rate": 3.6268421052631575e-05, + "loss": 0.5455, + "step": 95490 + }, + { + "epoch": 0.95, + "learning_rate": 3.6189473684210524e-05, + "loss": 0.5406, + "step": 95500 + }, + { + "epoch": 0.96, + "learning_rate": 3.611052631578947e-05, + "loss": 0.5361, + "step": 95510 + }, + { + "epoch": 0.96, + "learning_rate": 3.603157894736842e-05, + "loss": 0.5373, + "step": 95520 + }, + { + "epoch": 0.96, + "learning_rate": 3.595263157894736e-05, + "loss": 0.5197, + "step": 95530 + }, + { + "epoch": 0.96, + "learning_rate": 3.587368421052631e-05, + "loss": 0.5248, + "step": 95540 + }, + { + "epoch": 0.96, + "learning_rate": 3.579473684210526e-05, + "loss": 0.5302, + "step": 95550 + }, + { + "epoch": 0.96, + "learning_rate": 3.571578947368421e-05, + "loss": 0.5375, + "step": 95560 + }, + { + "epoch": 0.96, + "learning_rate": 3.5636842105263155e-05, + "loss": 0.5219, + "step": 95570 + }, + { + "epoch": 0.96, + "learning_rate": 3.55578947368421e-05, + "loss": 0.5273, + "step": 95580 + }, + { + "epoch": 0.96, + "learning_rate": 3.547894736842105e-05, + "loss": 0.5231, + "step": 95590 + }, + { + "epoch": 0.96, + "learning_rate": 3.539999999999999e-05, + "loss": 0.5261, + "step": 95600 + }, + { + "epoch": 0.96, + "learning_rate": 3.532105263157894e-05, + "loss": 0.5414, + "step": 95610 + }, + { + "epoch": 0.96, + "learning_rate": 3.524210526315789e-05, + "loss": 0.539, + "step": 95620 + }, + { + "epoch": 0.96, + "learning_rate": 3.516315789473684e-05, + "loss": 0.5431, + "step": 95630 + }, + { + "epoch": 0.96, + "learning_rate": 3.5084210526315786e-05, + "loss": 0.5294, + "step": 95640 + }, + { + "epoch": 0.96, + "learning_rate": 3.5005263157894735e-05, + "loss": 0.5412, + "step": 95650 + }, + { + "epoch": 0.96, + "learning_rate": 3.492631578947368e-05, + "loss": 0.5516, + "step": 95660 + }, + { + "epoch": 0.96, + "learning_rate": 3.4847368421052624e-05, + "loss": 0.5552, + "step": 95670 + }, + { + "epoch": 0.96, + "learning_rate": 3.476842105263157e-05, + "loss": 0.55, + "step": 95680 + }, + { + "epoch": 0.96, + "learning_rate": 3.468947368421052e-05, + "loss": 0.5607, + "step": 95690 + }, + { + "epoch": 0.96, + "learning_rate": 3.461052631578947e-05, + "loss": 0.5547, + "step": 95700 + }, + { + "epoch": 0.96, + "learning_rate": 3.453157894736842e-05, + "loss": 0.5573, + "step": 95710 + }, + { + "epoch": 0.96, + "learning_rate": 3.4452631578947366e-05, + "loss": 0.5617, + "step": 95720 + }, + { + "epoch": 0.96, + "learning_rate": 3.4373684210526314e-05, + "loss": 0.565, + "step": 95730 + }, + { + "epoch": 0.96, + "learning_rate": 3.4294736842105256e-05, + "loss": 0.5638, + "step": 95740 + }, + { + "epoch": 0.96, + "learning_rate": 3.4215789473684204e-05, + "loss": 0.5517, + "step": 95750 + }, + { + "epoch": 0.96, + "learning_rate": 3.413684210526315e-05, + "loss": 0.5676, + "step": 95760 + }, + { + "epoch": 0.96, + "learning_rate": 3.40578947368421e-05, + "loss": 0.5414, + "step": 95770 + }, + { + "epoch": 0.96, + "learning_rate": 3.397894736842105e-05, + "loss": 0.5531, + "step": 95780 + }, + { + "epoch": 0.96, + "learning_rate": 3.39e-05, + "loss": 0.5515, + "step": 95790 + }, + { + "epoch": 0.96, + "learning_rate": 3.3821052631578946e-05, + "loss": 0.5547, + "step": 95800 + }, + { + "epoch": 0.96, + "learning_rate": 3.374210526315789e-05, + "loss": 0.5481, + "step": 95810 + }, + { + "epoch": 0.96, + "learning_rate": 3.3663157894736835e-05, + "loss": 0.5464, + "step": 95820 + }, + { + "epoch": 0.96, + "learning_rate": 3.3584210526315784e-05, + "loss": 0.5296, + "step": 95830 + }, + { + "epoch": 0.96, + "learning_rate": 3.350526315789473e-05, + "loss": 0.5566, + "step": 95840 + }, + { + "epoch": 0.96, + "learning_rate": 3.342631578947368e-05, + "loss": 0.5197, + "step": 95850 + }, + { + "epoch": 0.96, + "learning_rate": 3.334736842105263e-05, + "loss": 0.5252, + "step": 95860 + }, + { + "epoch": 0.96, + "learning_rate": 3.326842105263158e-05, + "loss": 0.5438, + "step": 95870 + }, + { + "epoch": 0.96, + "learning_rate": 3.318947368421052e-05, + "loss": 0.5394, + "step": 95880 + }, + { + "epoch": 0.96, + "learning_rate": 3.311052631578947e-05, + "loss": 0.5398, + "step": 95890 + }, + { + "epoch": 0.96, + "learning_rate": 3.3031578947368415e-05, + "loss": 0.5438, + "step": 95900 + }, + { + "epoch": 0.96, + "learning_rate": 3.295263157894736e-05, + "loss": 0.5639, + "step": 95910 + }, + { + "epoch": 0.96, + "learning_rate": 3.287368421052631e-05, + "loss": 0.5493, + "step": 95920 + }, + { + "epoch": 0.96, + "learning_rate": 3.279473684210526e-05, + "loss": 0.5642, + "step": 95930 + }, + { + "epoch": 0.96, + "learning_rate": 3.271578947368421e-05, + "loss": 0.5573, + "step": 95940 + }, + { + "epoch": 0.96, + "learning_rate": 3.2636842105263157e-05, + "loss": 0.5567, + "step": 95950 + }, + { + "epoch": 0.96, + "learning_rate": 3.25578947368421e-05, + "loss": 0.5554, + "step": 95960 + }, + { + "epoch": 0.96, + "learning_rate": 3.2478947368421046e-05, + "loss": 0.5513, + "step": 95970 + }, + { + "epoch": 0.96, + "learning_rate": 3.2399999999999995e-05, + "loss": 0.5468, + "step": 95980 + }, + { + "epoch": 0.96, + "learning_rate": 3.232105263157894e-05, + "loss": 0.5452, + "step": 95990 + }, + { + "epoch": 0.96, + "learning_rate": 3.224210526315789e-05, + "loss": 0.5277, + "step": 96000 + }, + { + "epoch": 0.96, + "learning_rate": 3.216315789473684e-05, + "loss": 0.5449, + "step": 96010 + }, + { + "epoch": 0.96, + "learning_rate": 3.208421052631579e-05, + "loss": 0.5374, + "step": 96020 + }, + { + "epoch": 0.96, + "learning_rate": 3.200526315789473e-05, + "loss": 0.5532, + "step": 96030 + }, + { + "epoch": 0.96, + "learning_rate": 3.192631578947368e-05, + "loss": 0.5255, + "step": 96040 + }, + { + "epoch": 0.96, + "learning_rate": 3.1847368421052626e-05, + "loss": 0.5336, + "step": 96050 + }, + { + "epoch": 0.96, + "learning_rate": 3.1768421052631574e-05, + "loss": 0.5327, + "step": 96060 + }, + { + "epoch": 0.96, + "learning_rate": 3.168947368421052e-05, + "loss": 0.53, + "step": 96070 + }, + { + "epoch": 0.96, + "learning_rate": 3.161052631578947e-05, + "loss": 0.5198, + "step": 96080 + }, + { + "epoch": 0.96, + "learning_rate": 3.153157894736842e-05, + "loss": 0.5226, + "step": 96090 + }, + { + "epoch": 0.96, + "learning_rate": 3.145263157894736e-05, + "loss": 0.5224, + "step": 96100 + }, + { + "epoch": 0.96, + "learning_rate": 3.138157894736842e-05, + "loss": 0.5325, + "step": 96110 + }, + { + "epoch": 0.96, + "learning_rate": 3.1302631578947364e-05, + "loss": 0.5368, + "step": 96120 + }, + { + "epoch": 0.96, + "learning_rate": 3.122368421052631e-05, + "loss": 0.5431, + "step": 96130 + }, + { + "epoch": 0.96, + "learning_rate": 3.114473684210526e-05, + "loss": 0.5476, + "step": 96140 + }, + { + "epoch": 0.96, + "learning_rate": 3.106578947368421e-05, + "loss": 0.5619, + "step": 96150 + }, + { + "epoch": 0.96, + "learning_rate": 3.098684210526316e-05, + "loss": 0.5504, + "step": 96160 + }, + { + "epoch": 0.96, + "learning_rate": 3.0907894736842105e-05, + "loss": 0.5671, + "step": 96170 + }, + { + "epoch": 0.96, + "learning_rate": 3.0828947368421054e-05, + "loss": 0.5477, + "step": 96180 + }, + { + "epoch": 0.96, + "learning_rate": 3.0749999999999995e-05, + "loss": 0.5622, + "step": 96190 + }, + { + "epoch": 0.96, + "learning_rate": 3.0671052631578944e-05, + "loss": 0.5505, + "step": 96200 + }, + { + "epoch": 0.96, + "learning_rate": 3.059210526315789e-05, + "loss": 0.5486, + "step": 96210 + }, + { + "epoch": 0.96, + "learning_rate": 3.051315789473684e-05, + "loss": 0.5309, + "step": 96220 + }, + { + "epoch": 0.96, + "learning_rate": 3.043421052631579e-05, + "loss": 0.5347, + "step": 96230 + }, + { + "epoch": 0.96, + "learning_rate": 3.0355263157894737e-05, + "loss": 0.5366, + "step": 96240 + }, + { + "epoch": 0.96, + "learning_rate": 3.0276315789473682e-05, + "loss": 0.5322, + "step": 96250 + }, + { + "epoch": 0.96, + "learning_rate": 3.0197368421052627e-05, + "loss": 0.5508, + "step": 96260 + }, + { + "epoch": 0.96, + "learning_rate": 3.0118421052631575e-05, + "loss": 0.5386, + "step": 96270 + }, + { + "epoch": 0.96, + "learning_rate": 3.0039473684210523e-05, + "loss": 0.5458, + "step": 96280 + }, + { + "epoch": 0.96, + "learning_rate": 2.996052631578947e-05, + "loss": 0.5432, + "step": 96290 + }, + { + "epoch": 0.96, + "learning_rate": 2.988157894736842e-05, + "loss": 0.5317, + "step": 96300 + }, + { + "epoch": 0.96, + "learning_rate": 2.9802631578947368e-05, + "loss": 0.5331, + "step": 96310 + }, + { + "epoch": 0.96, + "learning_rate": 2.9723684210526316e-05, + "loss": 0.5273, + "step": 96320 + }, + { + "epoch": 0.96, + "learning_rate": 2.9644736842105258e-05, + "loss": 0.5326, + "step": 96330 + }, + { + "epoch": 0.96, + "learning_rate": 2.9565789473684206e-05, + "loss": 0.5359, + "step": 96340 + }, + { + "epoch": 0.96, + "learning_rate": 2.9486842105263155e-05, + "loss": 0.5455, + "step": 96350 + }, + { + "epoch": 0.96, + "learning_rate": 2.9407894736842103e-05, + "loss": 0.5487, + "step": 96360 + }, + { + "epoch": 0.96, + "learning_rate": 2.932894736842105e-05, + "loss": 0.5657, + "step": 96370 + }, + { + "epoch": 0.96, + "learning_rate": 2.925e-05, + "loss": 0.5638, + "step": 96380 + }, + { + "epoch": 0.96, + "learning_rate": 2.9171052631578948e-05, + "loss": 0.5571, + "step": 96390 + }, + { + "epoch": 0.96, + "learning_rate": 2.909210526315789e-05, + "loss": 0.5652, + "step": 96400 + }, + { + "epoch": 0.96, + "learning_rate": 2.9013157894736838e-05, + "loss": 0.5728, + "step": 96410 + }, + { + "epoch": 0.96, + "learning_rate": 2.8934210526315786e-05, + "loss": 0.5647, + "step": 96420 + }, + { + "epoch": 0.96, + "learning_rate": 2.8855263157894734e-05, + "loss": 0.5567, + "step": 96430 + }, + { + "epoch": 0.96, + "learning_rate": 2.8776315789473683e-05, + "loss": 0.558, + "step": 96440 + }, + { + "epoch": 0.96, + "learning_rate": 2.869736842105263e-05, + "loss": 0.5515, + "step": 96450 + }, + { + "epoch": 0.96, + "learning_rate": 2.861842105263158e-05, + "loss": 0.5398, + "step": 96460 + }, + { + "epoch": 0.96, + "learning_rate": 2.853947368421052e-05, + "loss": 0.5497, + "step": 96470 + }, + { + "epoch": 0.96, + "learning_rate": 2.846052631578947e-05, + "loss": 0.5523, + "step": 96480 + }, + { + "epoch": 0.96, + "learning_rate": 2.8381578947368417e-05, + "loss": 0.557, + "step": 96490 + }, + { + "epoch": 0.96, + "learning_rate": 2.8302631578947366e-05, + "loss": 0.5566, + "step": 96500 + }, + { + "epoch": 0.97, + "learning_rate": 2.8223684210526314e-05, + "loss": 0.5379, + "step": 96510 + }, + { + "epoch": 0.97, + "learning_rate": 2.8144736842105262e-05, + "loss": 0.5407, + "step": 96520 + }, + { + "epoch": 0.97, + "learning_rate": 2.806578947368421e-05, + "loss": 0.5258, + "step": 96530 + }, + { + "epoch": 0.97, + "learning_rate": 2.798684210526316e-05, + "loss": 0.54, + "step": 96540 + }, + { + "epoch": 0.97, + "learning_rate": 2.79078947368421e-05, + "loss": 0.524, + "step": 96550 + }, + { + "epoch": 0.97, + "learning_rate": 2.782894736842105e-05, + "loss": 0.5204, + "step": 96560 + }, + { + "epoch": 0.97, + "learning_rate": 2.7749999999999997e-05, + "loss": 0.5338, + "step": 96570 + }, + { + "epoch": 0.97, + "learning_rate": 2.7671052631578945e-05, + "loss": 0.541, + "step": 96580 + }, + { + "epoch": 0.97, + "learning_rate": 2.7592105263157893e-05, + "loss": 0.5475, + "step": 96590 + }, + { + "epoch": 0.97, + "learning_rate": 2.7513157894736842e-05, + "loss": 0.5553, + "step": 96600 + }, + { + "epoch": 0.97, + "learning_rate": 2.743421052631579e-05, + "loss": 0.565, + "step": 96610 + }, + { + "epoch": 0.97, + "learning_rate": 2.735526315789473e-05, + "loss": 0.5593, + "step": 96620 + }, + { + "epoch": 0.97, + "learning_rate": 2.727631578947368e-05, + "loss": 0.5644, + "step": 96630 + }, + { + "epoch": 0.97, + "learning_rate": 2.7197368421052628e-05, + "loss": 0.571, + "step": 96640 + }, + { + "epoch": 0.97, + "learning_rate": 2.7118421052631577e-05, + "loss": 0.569, + "step": 96650 + }, + { + "epoch": 0.97, + "learning_rate": 2.7039473684210525e-05, + "loss": 0.5711, + "step": 96660 + }, + { + "epoch": 0.97, + "learning_rate": 2.6960526315789473e-05, + "loss": 0.5648, + "step": 96670 + }, + { + "epoch": 0.97, + "learning_rate": 2.688157894736842e-05, + "loss": 0.5571, + "step": 96680 + }, + { + "epoch": 0.97, + "learning_rate": 2.6802631578947363e-05, + "loss": 0.5504, + "step": 96690 + }, + { + "epoch": 0.97, + "learning_rate": 2.672368421052631e-05, + "loss": 0.5558, + "step": 96700 + }, + { + "epoch": 0.97, + "learning_rate": 2.664473684210526e-05, + "loss": 0.5563, + "step": 96710 + }, + { + "epoch": 0.97, + "learning_rate": 2.6565789473684208e-05, + "loss": 0.5604, + "step": 96720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6486842105263156e-05, + "loss": 0.5428, + "step": 96730 + }, + { + "epoch": 0.97, + "learning_rate": 2.6407894736842104e-05, + "loss": 0.5404, + "step": 96740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6328947368421053e-05, + "loss": 0.5365, + "step": 96750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6249999999999998e-05, + "loss": 0.5401, + "step": 96760 + }, + { + "epoch": 0.97, + "learning_rate": 2.6171052631578943e-05, + "loss": 0.5342, + "step": 96770 + }, + { + "epoch": 0.97, + "learning_rate": 2.609210526315789e-05, + "loss": 0.5354, + "step": 96780 + }, + { + "epoch": 0.97, + "learning_rate": 2.601315789473684e-05, + "loss": 0.5306, + "step": 96790 + }, + { + "epoch": 0.97, + "learning_rate": 2.5934210526315788e-05, + "loss": 0.536, + "step": 96800 + }, + { + "epoch": 0.97, + "learning_rate": 2.5855263157894736e-05, + "loss": 0.5567, + "step": 96810 + }, + { + "epoch": 0.97, + "learning_rate": 2.5776315789473684e-05, + "loss": 0.5455, + "step": 96820 + }, + { + "epoch": 0.97, + "learning_rate": 2.569736842105263e-05, + "loss": 0.5507, + "step": 96830 + }, + { + "epoch": 0.97, + "learning_rate": 2.5618421052631574e-05, + "loss": 0.5625, + "step": 96840 + }, + { + "epoch": 0.97, + "learning_rate": 2.5539473684210522e-05, + "loss": 0.5549, + "step": 96850 + }, + { + "epoch": 0.97, + "learning_rate": 2.546052631578947e-05, + "loss": 0.5499, + "step": 96860 + }, + { + "epoch": 0.97, + "learning_rate": 2.538157894736842e-05, + "loss": 0.5542, + "step": 96870 + }, + { + "epoch": 0.97, + "learning_rate": 2.5302631578947367e-05, + "loss": 0.5479, + "step": 96880 + }, + { + "epoch": 0.97, + "learning_rate": 2.5223684210526315e-05, + "loss": 0.5515, + "step": 96890 + }, + { + "epoch": 0.97, + "learning_rate": 2.514473684210526e-05, + "loss": 0.5341, + "step": 96900 + }, + { + "epoch": 0.97, + "learning_rate": 2.506578947368421e-05, + "loss": 0.5378, + "step": 96910 + }, + { + "epoch": 0.97, + "learning_rate": 2.4986842105263154e-05, + "loss": 0.5312, + "step": 96920 + }, + { + "epoch": 0.97, + "learning_rate": 2.4907894736842102e-05, + "loss": 0.5169, + "step": 96930 + }, + { + "epoch": 0.97, + "learning_rate": 2.482894736842105e-05, + "loss": 0.5368, + "step": 96940 + }, + { + "epoch": 0.97, + "learning_rate": 2.475e-05, + "loss": 0.5416, + "step": 96950 + }, + { + "epoch": 0.97, + "learning_rate": 2.4671052631578947e-05, + "loss": 0.5266, + "step": 96960 + }, + { + "epoch": 0.97, + "learning_rate": 2.4592105263157892e-05, + "loss": 0.5371, + "step": 96970 + }, + { + "epoch": 0.97, + "learning_rate": 2.451315789473684e-05, + "loss": 0.547, + "step": 96980 + }, + { + "epoch": 0.97, + "learning_rate": 2.443421052631579e-05, + "loss": 0.5425, + "step": 96990 + }, + { + "epoch": 0.97, + "learning_rate": 2.4355263157894733e-05, + "loss": 0.5337, + "step": 97000 + }, + { + "epoch": 0.97, + "learning_rate": 2.427631578947368e-05, + "loss": 0.5488, + "step": 97010 + }, + { + "epoch": 0.97, + "learning_rate": 2.419736842105263e-05, + "loss": 0.5378, + "step": 97020 + }, + { + "epoch": 0.97, + "learning_rate": 2.4118421052631578e-05, + "loss": 0.5393, + "step": 97030 + }, + { + "epoch": 0.97, + "learning_rate": 2.4039473684210523e-05, + "loss": 0.5527, + "step": 97040 + }, + { + "epoch": 0.97, + "learning_rate": 2.396052631578947e-05, + "loss": 0.5498, + "step": 97050 + }, + { + "epoch": 0.97, + "learning_rate": 2.388157894736842e-05, + "loss": 0.5658, + "step": 97060 + }, + { + "epoch": 0.97, + "learning_rate": 2.3802631578947365e-05, + "loss": 0.561, + "step": 97070 + }, + { + "epoch": 0.97, + "learning_rate": 2.3723684210526313e-05, + "loss": 0.5657, + "step": 97080 + }, + { + "epoch": 0.97, + "learning_rate": 2.364473684210526e-05, + "loss": 0.5672, + "step": 97090 + }, + { + "epoch": 0.97, + "learning_rate": 2.356578947368421e-05, + "loss": 0.5773, + "step": 97100 + }, + { + "epoch": 0.97, + "learning_rate": 2.349473684210526e-05, + "loss": 0.5737, + "step": 97110 + }, + { + "epoch": 0.97, + "learning_rate": 2.341578947368421e-05, + "loss": 0.5672, + "step": 97120 + }, + { + "epoch": 0.97, + "learning_rate": 2.3336842105263154e-05, + "loss": 0.5636, + "step": 97130 + }, + { + "epoch": 0.97, + "learning_rate": 2.3257894736842102e-05, + "loss": 0.5599, + "step": 97140 + }, + { + "epoch": 0.97, + "learning_rate": 2.317894736842105e-05, + "loss": 0.5634, + "step": 97150 + }, + { + "epoch": 0.97, + "learning_rate": 2.31e-05, + "loss": 0.5527, + "step": 97160 + }, + { + "epoch": 0.97, + "learning_rate": 2.3021052631578944e-05, + "loss": 0.5462, + "step": 97170 + }, + { + "epoch": 0.97, + "learning_rate": 2.2942105263157892e-05, + "loss": 0.5676, + "step": 97180 + }, + { + "epoch": 0.97, + "learning_rate": 2.286315789473684e-05, + "loss": 0.5544, + "step": 97190 + }, + { + "epoch": 0.97, + "learning_rate": 2.278421052631579e-05, + "loss": 0.5497, + "step": 97200 + }, + { + "epoch": 0.97, + "learning_rate": 2.2705263157894734e-05, + "loss": 0.5432, + "step": 97210 + }, + { + "epoch": 0.97, + "learning_rate": 2.2626315789473682e-05, + "loss": 0.5352, + "step": 97220 + }, + { + "epoch": 0.97, + "learning_rate": 2.254736842105263e-05, + "loss": 0.5462, + "step": 97230 + }, + { + "epoch": 0.97, + "learning_rate": 2.246842105263158e-05, + "loss": 0.5296, + "step": 97240 + }, + { + "epoch": 0.97, + "learning_rate": 2.2389473684210524e-05, + "loss": 0.5315, + "step": 97250 + }, + { + "epoch": 0.97, + "learning_rate": 2.2310526315789472e-05, + "loss": 0.5399, + "step": 97260 + }, + { + "epoch": 0.97, + "learning_rate": 2.223157894736842e-05, + "loss": 0.542, + "step": 97270 + }, + { + "epoch": 0.97, + "learning_rate": 2.2152631578947365e-05, + "loss": 0.5405, + "step": 97280 + }, + { + "epoch": 0.97, + "learning_rate": 2.2073684210526313e-05, + "loss": 0.5418, + "step": 97290 + }, + { + "epoch": 0.97, + "learning_rate": 2.1994736842105262e-05, + "loss": 0.5631, + "step": 97300 + }, + { + "epoch": 0.97, + "learning_rate": 2.191578947368421e-05, + "loss": 0.5695, + "step": 97310 + }, + { + "epoch": 0.97, + "learning_rate": 2.183684210526316e-05, + "loss": 0.5689, + "step": 97320 + }, + { + "epoch": 0.97, + "learning_rate": 2.1757894736842103e-05, + "loss": 0.5672, + "step": 97330 + }, + { + "epoch": 0.97, + "learning_rate": 2.167894736842105e-05, + "loss": 0.5668, + "step": 97340 + }, + { + "epoch": 0.97, + "learning_rate": 2.1599999999999996e-05, + "loss": 0.5717, + "step": 97350 + }, + { + "epoch": 0.97, + "learning_rate": 2.1521052631578945e-05, + "loss": 0.5578, + "step": 97360 + }, + { + "epoch": 0.97, + "learning_rate": 2.1442105263157893e-05, + "loss": 0.5658, + "step": 97370 + }, + { + "epoch": 0.97, + "learning_rate": 2.136315789473684e-05, + "loss": 0.5531, + "step": 97380 + }, + { + "epoch": 0.97, + "learning_rate": 2.128421052631579e-05, + "loss": 0.5455, + "step": 97390 + }, + { + "epoch": 0.97, + "learning_rate": 2.1205263157894735e-05, + "loss": 0.5528, + "step": 97400 + }, + { + "epoch": 0.97, + "learning_rate": 2.1126315789473683e-05, + "loss": 0.5488, + "step": 97410 + }, + { + "epoch": 0.97, + "learning_rate": 2.1047368421052628e-05, + "loss": 0.5441, + "step": 97420 + }, + { + "epoch": 0.97, + "learning_rate": 2.0968421052631576e-05, + "loss": 0.5538, + "step": 97430 + }, + { + "epoch": 0.97, + "learning_rate": 2.0889473684210524e-05, + "loss": 0.5341, + "step": 97440 + }, + { + "epoch": 0.97, + "learning_rate": 2.0810526315789473e-05, + "loss": 0.5429, + "step": 97450 + }, + { + "epoch": 0.97, + "learning_rate": 2.073157894736842e-05, + "loss": 0.5401, + "step": 97460 + }, + { + "epoch": 0.97, + "learning_rate": 2.065263157894737e-05, + "loss": 0.5352, + "step": 97470 + }, + { + "epoch": 0.97, + "learning_rate": 2.0573684210526314e-05, + "loss": 0.5236, + "step": 97480 + }, + { + "epoch": 0.97, + "learning_rate": 2.049473684210526e-05, + "loss": 0.5251, + "step": 97490 + }, + { + "epoch": 0.97, + "learning_rate": 2.0415789473684207e-05, + "loss": 0.5369, + "step": 97500 + }, + { + "epoch": 0.97, + "eval_accuracy": 0.8848131360716049, + "eval_loss": 0.5224609375, + "eval_runtime": 97.6227, + "eval_samples_per_second": 819.482, + "eval_steps_per_second": 1.608, + "step": 97500 + }, + { + "epoch": 0.98, + "learning_rate": 2.0336842105263156e-05, + "loss": 0.5494, + "step": 97510 + }, + { + "epoch": 0.98, + "learning_rate": 2.0257894736842104e-05, + "loss": 0.5459, + "step": 97520 + }, + { + "epoch": 0.98, + "learning_rate": 2.0178947368421052e-05, + "loss": 0.5491, + "step": 97530 + }, + { + "epoch": 0.98, + "learning_rate": 2.01e-05, + "loss": 0.5605, + "step": 97540 + }, + { + "epoch": 0.98, + "learning_rate": 2.002105263157895e-05, + "loss": 0.5561, + "step": 97550 + }, + { + "epoch": 0.98, + "learning_rate": 1.994210526315789e-05, + "loss": 0.5638, + "step": 97560 + }, + { + "epoch": 0.98, + "learning_rate": 1.986315789473684e-05, + "loss": 0.5807, + "step": 97570 + }, + { + "epoch": 0.98, + "learning_rate": 1.9784210526315787e-05, + "loss": 0.5669, + "step": 97580 + }, + { + "epoch": 0.98, + "learning_rate": 1.9705263157894735e-05, + "loss": 0.568, + "step": 97590 + }, + { + "epoch": 0.98, + "learning_rate": 1.9626315789473684e-05, + "loss": 0.5584, + "step": 97600 + }, + { + "epoch": 0.98, + "learning_rate": 1.9547368421052632e-05, + "loss": 0.5545, + "step": 97610 + }, + { + "epoch": 0.98, + "learning_rate": 1.946842105263158e-05, + "loss": 0.5602, + "step": 97620 + }, + { + "epoch": 0.98, + "learning_rate": 1.9389473684210522e-05, + "loss": 0.5527, + "step": 97630 + }, + { + "epoch": 0.98, + "learning_rate": 1.931052631578947e-05, + "loss": 0.5406, + "step": 97640 + }, + { + "epoch": 0.98, + "learning_rate": 1.923157894736842e-05, + "loss": 0.5477, + "step": 97650 + }, + { + "epoch": 0.98, + "learning_rate": 1.9152631578947367e-05, + "loss": 0.5427, + "step": 97660 + }, + { + "epoch": 0.98, + "learning_rate": 1.9073684210526315e-05, + "loss": 0.547, + "step": 97670 + }, + { + "epoch": 0.98, + "learning_rate": 1.8994736842105263e-05, + "loss": 0.5386, + "step": 97680 + }, + { + "epoch": 0.98, + "learning_rate": 1.891578947368421e-05, + "loss": 0.546, + "step": 97690 + }, + { + "epoch": 0.98, + "learning_rate": 1.8836842105263153e-05, + "loss": 0.5366, + "step": 97700 + }, + { + "epoch": 0.98, + "learning_rate": 1.87578947368421e-05, + "loss": 0.546, + "step": 97710 + }, + { + "epoch": 0.98, + "learning_rate": 1.867894736842105e-05, + "loss": 0.5417, + "step": 97720 + }, + { + "epoch": 0.98, + "learning_rate": 1.8599999999999998e-05, + "loss": 0.5293, + "step": 97730 + }, + { + "epoch": 0.98, + "learning_rate": 1.8521052631578946e-05, + "loss": 0.5449, + "step": 97740 + }, + { + "epoch": 0.98, + "learning_rate": 1.844210526315789e-05, + "loss": 0.5507, + "step": 97750 + }, + { + "epoch": 0.98, + "learning_rate": 1.836315789473684e-05, + "loss": 0.5582, + "step": 97760 + }, + { + "epoch": 0.98, + "learning_rate": 1.8284210526315788e-05, + "loss": 0.5668, + "step": 97770 + }, + { + "epoch": 0.98, + "learning_rate": 1.8205263157894736e-05, + "loss": 0.5599, + "step": 97780 + }, + { + "epoch": 0.98, + "learning_rate": 1.812631578947368e-05, + "loss": 0.5694, + "step": 97790 + }, + { + "epoch": 0.98, + "learning_rate": 1.804736842105263e-05, + "loss": 0.5687, + "step": 97800 + }, + { + "epoch": 0.98, + "learning_rate": 1.7968421052631578e-05, + "loss": 0.5715, + "step": 97810 + }, + { + "epoch": 0.98, + "learning_rate": 1.7889473684210526e-05, + "loss": 0.5682, + "step": 97820 + }, + { + "epoch": 0.98, + "learning_rate": 1.781052631578947e-05, + "loss": 0.5604, + "step": 97830 + }, + { + "epoch": 0.98, + "learning_rate": 1.773157894736842e-05, + "loss": 0.5544, + "step": 97840 + }, + { + "epoch": 0.98, + "learning_rate": 1.7652631578947368e-05, + "loss": 0.562, + "step": 97850 + }, + { + "epoch": 0.98, + "learning_rate": 1.7573684210526312e-05, + "loss": 0.5514, + "step": 97860 + }, + { + "epoch": 0.98, + "learning_rate": 1.749473684210526e-05, + "loss": 0.5468, + "step": 97870 + }, + { + "epoch": 0.98, + "learning_rate": 1.741578947368421e-05, + "loss": 0.5509, + "step": 97880 + }, + { + "epoch": 0.98, + "learning_rate": 1.7336842105263157e-05, + "loss": 0.5626, + "step": 97890 + }, + { + "epoch": 0.98, + "learning_rate": 1.7257894736842102e-05, + "loss": 0.5478, + "step": 97900 + }, + { + "epoch": 0.98, + "learning_rate": 1.717894736842105e-05, + "loss": 0.5339, + "step": 97910 + }, + { + "epoch": 0.98, + "learning_rate": 1.71e-05, + "loss": 0.5395, + "step": 97920 + }, + { + "epoch": 0.98, + "learning_rate": 1.7021052631578944e-05, + "loss": 0.5329, + "step": 97930 + }, + { + "epoch": 0.98, + "learning_rate": 1.6942105263157892e-05, + "loss": 0.5265, + "step": 97940 + }, + { + "epoch": 0.98, + "learning_rate": 1.686315789473684e-05, + "loss": 0.5336, + "step": 97950 + }, + { + "epoch": 0.98, + "learning_rate": 1.678421052631579e-05, + "loss": 0.5251, + "step": 97960 + }, + { + "epoch": 0.98, + "learning_rate": 1.6705263157894734e-05, + "loss": 0.5393, + "step": 97970 + }, + { + "epoch": 0.98, + "learning_rate": 1.6626315789473682e-05, + "loss": 0.5467, + "step": 97980 + }, + { + "epoch": 0.98, + "learning_rate": 1.654736842105263e-05, + "loss": 0.5554, + "step": 97990 + }, + { + "epoch": 0.98, + "learning_rate": 1.6468421052631575e-05, + "loss": 0.5688, + "step": 98000 + }, + { + "epoch": 0.98, + "learning_rate": 1.6389473684210523e-05, + "loss": 0.5638, + "step": 98010 + }, + { + "epoch": 0.98, + "learning_rate": 1.6310526315789472e-05, + "loss": 0.5665, + "step": 98020 + }, + { + "epoch": 0.98, + "learning_rate": 1.623157894736842e-05, + "loss": 0.5707, + "step": 98030 + }, + { + "epoch": 0.98, + "learning_rate": 1.6152631578947365e-05, + "loss": 0.5603, + "step": 98040 + }, + { + "epoch": 0.98, + "learning_rate": 1.6073684210526313e-05, + "loss": 0.5643, + "step": 98050 + }, + { + "epoch": 0.98, + "learning_rate": 1.599473684210526e-05, + "loss": 0.5695, + "step": 98060 + }, + { + "epoch": 0.98, + "learning_rate": 1.5915789473684207e-05, + "loss": 0.5653, + "step": 98070 + }, + { + "epoch": 0.98, + "learning_rate": 1.5836842105263155e-05, + "loss": 0.5578, + "step": 98080 + }, + { + "epoch": 0.98, + "learning_rate": 1.5757894736842103e-05, + "loss": 0.5433, + "step": 98090 + }, + { + "epoch": 0.98, + "learning_rate": 1.567894736842105e-05, + "loss": 0.5342, + "step": 98100 + }, + { + "epoch": 0.98, + "learning_rate": 1.5599999999999996e-05, + "loss": 0.5509, + "step": 98110 + }, + { + "epoch": 0.98, + "learning_rate": 1.552894736842105e-05, + "loss": 0.5387, + "step": 98120 + }, + { + "epoch": 0.98, + "learning_rate": 1.545e-05, + "loss": 0.5577, + "step": 98130 + }, + { + "epoch": 0.98, + "learning_rate": 1.5371052631578944e-05, + "loss": 0.5427, + "step": 98140 + }, + { + "epoch": 0.98, + "learning_rate": 1.5292105263157893e-05, + "loss": 0.5401, + "step": 98150 + }, + { + "epoch": 0.98, + "learning_rate": 1.5213157894736841e-05, + "loss": 0.5331, + "step": 98160 + }, + { + "epoch": 0.98, + "learning_rate": 1.5134210526315788e-05, + "loss": 0.5166, + "step": 98170 + }, + { + "epoch": 0.98, + "learning_rate": 1.5055263157894736e-05, + "loss": 0.5319, + "step": 98180 + }, + { + "epoch": 0.98, + "learning_rate": 1.4976315789473684e-05, + "loss": 0.5298, + "step": 98190 + }, + { + "epoch": 0.98, + "learning_rate": 1.4897368421052629e-05, + "loss": 0.5225, + "step": 98200 + }, + { + "epoch": 0.98, + "learning_rate": 1.4818421052631577e-05, + "loss": 0.5472, + "step": 98210 + }, + { + "epoch": 0.98, + "learning_rate": 1.4739473684210526e-05, + "loss": 0.5575, + "step": 98220 + }, + { + "epoch": 0.98, + "learning_rate": 1.4660526315789474e-05, + "loss": 0.5471, + "step": 98230 + }, + { + "epoch": 0.98, + "learning_rate": 1.4581578947368419e-05, + "loss": 0.565, + "step": 98240 + }, + { + "epoch": 0.98, + "learning_rate": 1.4502631578947367e-05, + "loss": 0.5484, + "step": 98250 + }, + { + "epoch": 0.98, + "learning_rate": 1.4423684210526316e-05, + "loss": 0.5682, + "step": 98260 + }, + { + "epoch": 0.98, + "learning_rate": 1.434473684210526e-05, + "loss": 0.5639, + "step": 98270 + }, + { + "epoch": 0.98, + "learning_rate": 1.4265789473684209e-05, + "loss": 0.5628, + "step": 98280 + }, + { + "epoch": 0.98, + "learning_rate": 1.4186842105263157e-05, + "loss": 0.5645, + "step": 98290 + }, + { + "epoch": 0.98, + "learning_rate": 1.4107894736842105e-05, + "loss": 0.558, + "step": 98300 + }, + { + "epoch": 0.98, + "learning_rate": 1.402894736842105e-05, + "loss": 0.5468, + "step": 98310 + }, + { + "epoch": 0.98, + "learning_rate": 1.3949999999999999e-05, + "loss": 0.555, + "step": 98320 + }, + { + "epoch": 0.98, + "learning_rate": 1.3871052631578947e-05, + "loss": 0.5515, + "step": 98330 + }, + { + "epoch": 0.98, + "learning_rate": 1.3792105263157892e-05, + "loss": 0.5408, + "step": 98340 + }, + { + "epoch": 0.98, + "learning_rate": 1.371315789473684e-05, + "loss": 0.5386, + "step": 98350 + }, + { + "epoch": 0.98, + "learning_rate": 1.3634210526315788e-05, + "loss": 0.54, + "step": 98360 + }, + { + "epoch": 0.98, + "learning_rate": 1.3555263157894737e-05, + "loss": 0.5423, + "step": 98370 + }, + { + "epoch": 0.98, + "learning_rate": 1.3476315789473682e-05, + "loss": 0.5279, + "step": 98380 + }, + { + "epoch": 0.98, + "learning_rate": 1.339736842105263e-05, + "loss": 0.537, + "step": 98390 + }, + { + "epoch": 0.98, + "learning_rate": 1.3318421052631578e-05, + "loss": 0.536, + "step": 98400 + }, + { + "epoch": 0.98, + "learning_rate": 1.3239473684210527e-05, + "loss": 0.5422, + "step": 98410 + }, + { + "epoch": 0.98, + "learning_rate": 1.3160526315789471e-05, + "loss": 0.544, + "step": 98420 + }, + { + "epoch": 0.98, + "learning_rate": 1.308157894736842e-05, + "loss": 0.5422, + "step": 98430 + }, + { + "epoch": 0.98, + "learning_rate": 1.3002631578947368e-05, + "loss": 0.5319, + "step": 98440 + }, + { + "epoch": 0.98, + "learning_rate": 1.2923684210526315e-05, + "loss": 0.5468, + "step": 98450 + }, + { + "epoch": 0.98, + "learning_rate": 1.2844736842105261e-05, + "loss": 0.5449, + "step": 98460 + }, + { + "epoch": 0.98, + "learning_rate": 1.276578947368421e-05, + "loss": 0.5569, + "step": 98470 + }, + { + "epoch": 0.98, + "learning_rate": 1.2686842105263158e-05, + "loss": 0.5599, + "step": 98480 + }, + { + "epoch": 0.98, + "learning_rate": 1.2607894736842104e-05, + "loss": 0.5608, + "step": 98490 + }, + { + "epoch": 0.98, + "learning_rate": 1.2528947368421051e-05, + "loss": 0.5747, + "step": 98500 + }, + { + "epoch": 0.99, + "learning_rate": 1.245e-05, + "loss": 0.5713, + "step": 98510 + }, + { + "epoch": 0.99, + "learning_rate": 1.2371052631578946e-05, + "loss": 0.5587, + "step": 98520 + }, + { + "epoch": 0.99, + "learning_rate": 1.2292105263157893e-05, + "loss": 0.568, + "step": 98530 + }, + { + "epoch": 0.99, + "learning_rate": 1.2213157894736841e-05, + "loss": 0.5496, + "step": 98540 + }, + { + "epoch": 0.99, + "learning_rate": 1.213421052631579e-05, + "loss": 0.5667, + "step": 98550 + }, + { + "epoch": 0.99, + "learning_rate": 1.2055263157894736e-05, + "loss": 0.5402, + "step": 98560 + }, + { + "epoch": 0.99, + "learning_rate": 1.1976315789473682e-05, + "loss": 0.5294, + "step": 98570 + }, + { + "epoch": 0.99, + "learning_rate": 1.189736842105263e-05, + "loss": 0.5306, + "step": 98580 + }, + { + "epoch": 0.99, + "learning_rate": 1.1818421052631577e-05, + "loss": 0.5317, + "step": 98590 + }, + { + "epoch": 0.99, + "learning_rate": 1.1739473684210526e-05, + "loss": 0.5424, + "step": 98600 + }, + { + "epoch": 0.99, + "learning_rate": 1.1660526315789472e-05, + "loss": 0.5175, + "step": 98610 + }, + { + "epoch": 0.99, + "learning_rate": 1.158157894736842e-05, + "loss": 0.5112, + "step": 98620 + }, + { + "epoch": 0.99, + "learning_rate": 1.1502631578947367e-05, + "loss": 0.5215, + "step": 98630 + }, + { + "epoch": 0.99, + "learning_rate": 1.1423684210526315e-05, + "loss": 0.5233, + "step": 98640 + }, + { + "epoch": 0.99, + "learning_rate": 1.1344736842105262e-05, + "loss": 0.5117, + "step": 98650 + }, + { + "epoch": 0.99, + "learning_rate": 1.1265789473684209e-05, + "loss": 0.5186, + "step": 98660 + }, + { + "epoch": 0.99, + "learning_rate": 1.1186842105263157e-05, + "loss": 0.5214, + "step": 98670 + }, + { + "epoch": 0.99, + "learning_rate": 1.1107894736842105e-05, + "loss": 0.5307, + "step": 98680 + }, + { + "epoch": 0.99, + "learning_rate": 1.1028947368421052e-05, + "loss": 0.5245, + "step": 98690 + }, + { + "epoch": 0.99, + "learning_rate": 1.0949999999999998e-05, + "loss": 0.5476, + "step": 98700 + }, + { + "epoch": 0.99, + "learning_rate": 1.0871052631578947e-05, + "loss": 0.5443, + "step": 98710 + }, + { + "epoch": 0.99, + "learning_rate": 1.0792105263157895e-05, + "loss": 0.5479, + "step": 98720 + }, + { + "epoch": 0.99, + "learning_rate": 1.071315789473684e-05, + "loss": 0.5547, + "step": 98730 + }, + { + "epoch": 0.99, + "learning_rate": 1.0634210526315788e-05, + "loss": 0.5645, + "step": 98740 + }, + { + "epoch": 0.99, + "learning_rate": 1.0555263157894737e-05, + "loss": 0.5748, + "step": 98750 + }, + { + "epoch": 0.99, + "learning_rate": 1.0476315789473683e-05, + "loss": 0.5618, + "step": 98760 + }, + { + "epoch": 0.99, + "learning_rate": 1.039736842105263e-05, + "loss": 0.5506, + "step": 98770 + }, + { + "epoch": 0.99, + "learning_rate": 1.0318421052631578e-05, + "loss": 0.529, + "step": 98780 + }, + { + "epoch": 0.99, + "learning_rate": 1.0239473684210526e-05, + "loss": 0.536, + "step": 98790 + }, + { + "epoch": 0.99, + "learning_rate": 1.0160526315789473e-05, + "loss": 0.5472, + "step": 98800 + }, + { + "epoch": 0.99, + "learning_rate": 1.008157894736842e-05, + "loss": 0.5439, + "step": 98810 + }, + { + "epoch": 0.99, + "learning_rate": 1.0002631578947368e-05, + "loss": 0.5529, + "step": 98820 + }, + { + "epoch": 0.99, + "learning_rate": 9.923684210526316e-06, + "loss": 0.5439, + "step": 98830 + }, + { + "epoch": 0.99, + "learning_rate": 9.844736842105261e-06, + "loss": 0.533, + "step": 98840 + }, + { + "epoch": 0.99, + "learning_rate": 9.76578947368421e-06, + "loss": 0.5391, + "step": 98850 + }, + { + "epoch": 0.99, + "learning_rate": 9.686842105263158e-06, + "loss": 0.5274, + "step": 98860 + }, + { + "epoch": 0.99, + "learning_rate": 9.607894736842106e-06, + "loss": 0.5355, + "step": 98870 + }, + { + "epoch": 0.99, + "learning_rate": 9.528947368421051e-06, + "loss": 0.5272, + "step": 98880 + }, + { + "epoch": 0.99, + "learning_rate": 9.45e-06, + "loss": 0.5291, + "step": 98890 + }, + { + "epoch": 0.99, + "learning_rate": 9.371052631578946e-06, + "loss": 0.5353, + "step": 98900 + }, + { + "epoch": 0.99, + "learning_rate": 9.292105263157894e-06, + "loss": 0.5427, + "step": 98910 + }, + { + "epoch": 0.99, + "learning_rate": 9.21315789473684e-06, + "loss": 0.5431, + "step": 98920 + }, + { + "epoch": 0.99, + "learning_rate": 9.134210526315789e-06, + "loss": 0.5431, + "step": 98930 + }, + { + "epoch": 0.99, + "learning_rate": 9.055263157894736e-06, + "loss": 0.5569, + "step": 98940 + }, + { + "epoch": 0.99, + "learning_rate": 8.976315789473684e-06, + "loss": 0.563, + "step": 98950 + }, + { + "epoch": 0.99, + "learning_rate": 8.89736842105263e-06, + "loss": 0.5641, + "step": 98960 + }, + { + "epoch": 0.99, + "learning_rate": 8.818421052631577e-06, + "loss": 0.5705, + "step": 98970 + }, + { + "epoch": 0.99, + "learning_rate": 8.74736842105263e-06, + "loss": 0.5701, + "step": 98980 + }, + { + "epoch": 0.99, + "learning_rate": 8.668421052631579e-06, + "loss": 0.5524, + "step": 98990 + }, + { + "epoch": 0.99, + "learning_rate": 8.589473684210525e-06, + "loss": 0.5472, + "step": 99000 + }, + { + "epoch": 0.99, + "learning_rate": 8.510526315789472e-06, + "loss": 0.5477, + "step": 99010 + }, + { + "epoch": 0.99, + "learning_rate": 8.43157894736842e-06, + "loss": 0.5383, + "step": 99020 + }, + { + "epoch": 0.99, + "learning_rate": 8.352631578947367e-06, + "loss": 0.5362, + "step": 99030 + }, + { + "epoch": 0.99, + "learning_rate": 8.273684210526315e-06, + "loss": 0.5428, + "step": 99040 + }, + { + "epoch": 0.99, + "learning_rate": 8.194736842105262e-06, + "loss": 0.5357, + "step": 99050 + }, + { + "epoch": 0.99, + "learning_rate": 8.11578947368421e-06, + "loss": 0.5365, + "step": 99060 + }, + { + "epoch": 0.99, + "learning_rate": 8.036842105263157e-06, + "loss": 0.5419, + "step": 99070 + }, + { + "epoch": 0.99, + "learning_rate": 7.957894736842103e-06, + "loss": 0.5353, + "step": 99080 + }, + { + "epoch": 0.99, + "learning_rate": 7.878947368421052e-06, + "loss": 0.5401, + "step": 99090 + }, + { + "epoch": 0.99, + "learning_rate": 7.799999999999998e-06, + "loss": 0.5318, + "step": 99100 + }, + { + "epoch": 0.99, + "learning_rate": 7.721052631578946e-06, + "loss": 0.5423, + "step": 99110 + }, + { + "epoch": 0.99, + "learning_rate": 7.642105263157893e-06, + "loss": 0.5223, + "step": 99120 + }, + { + "epoch": 0.99, + "learning_rate": 7.563157894736842e-06, + "loss": 0.5314, + "step": 99130 + }, + { + "epoch": 0.99, + "learning_rate": 7.484210526315789e-06, + "loss": 0.5375, + "step": 99140 + }, + { + "epoch": 0.99, + "learning_rate": 7.405263157894736e-06, + "loss": 0.5475, + "step": 99150 + }, + { + "epoch": 0.99, + "learning_rate": 7.326315789473684e-06, + "loss": 0.5481, + "step": 99160 + }, + { + "epoch": 0.99, + "learning_rate": 7.24736842105263e-06, + "loss": 0.5519, + "step": 99170 + }, + { + "epoch": 0.99, + "learning_rate": 7.168421052631579e-06, + "loss": 0.5675, + "step": 99180 + }, + { + "epoch": 0.99, + "learning_rate": 7.089473684210525e-06, + "loss": 0.5612, + "step": 99190 + }, + { + "epoch": 0.99, + "learning_rate": 7.0105263157894736e-06, + "loss": 0.5572, + "step": 99200 + }, + { + "epoch": 0.99, + "learning_rate": 6.93157894736842e-06, + "loss": 0.5683, + "step": 99210 + }, + { + "epoch": 0.99, + "learning_rate": 6.8526315789473685e-06, + "loss": 0.5542, + "step": 99220 + }, + { + "epoch": 0.99, + "learning_rate": 6.773684210526315e-06, + "loss": 0.5614, + "step": 99230 + }, + { + "epoch": 0.99, + "learning_rate": 6.694736842105262e-06, + "loss": 0.5456, + "step": 99240 + }, + { + "epoch": 0.99, + "learning_rate": 6.61578947368421e-06, + "loss": 0.5442, + "step": 99250 + }, + { + "epoch": 0.99, + "learning_rate": 6.536842105263157e-06, + "loss": 0.5416, + "step": 99260 + }, + { + "epoch": 0.99, + "learning_rate": 6.457894736842105e-06, + "loss": 0.5463, + "step": 99270 + }, + { + "epoch": 0.99, + "learning_rate": 6.3789473684210515e-06, + "loss": 0.5546, + "step": 99280 + }, + { + "epoch": 0.99, + "learning_rate": 6.3e-06, + "loss": 0.5339, + "step": 99290 + }, + { + "epoch": 0.99, + "learning_rate": 6.2210526315789464e-06, + "loss": 0.5405, + "step": 99300 + }, + { + "epoch": 0.99, + "learning_rate": 6.142105263157894e-06, + "loss": 0.533, + "step": 99310 + }, + { + "epoch": 0.99, + "learning_rate": 6.063157894736841e-06, + "loss": 0.5315, + "step": 99320 + }, + { + "epoch": 0.99, + "learning_rate": 5.984210526315789e-06, + "loss": 0.541, + "step": 99330 + }, + { + "epoch": 0.99, + "learning_rate": 5.905263157894736e-06, + "loss": 0.5386, + "step": 99340 + }, + { + "epoch": 0.99, + "learning_rate": 5.826315789473684e-06, + "loss": 0.5231, + "step": 99350 + }, + { + "epoch": 0.99, + "learning_rate": 5.747368421052631e-06, + "loss": 0.5433, + "step": 99360 + }, + { + "epoch": 0.99, + "learning_rate": 5.668421052631579e-06, + "loss": 0.5502, + "step": 99370 + }, + { + "epoch": 0.99, + "learning_rate": 5.589473684210526e-06, + "loss": 0.5589, + "step": 99380 + }, + { + "epoch": 0.99, + "learning_rate": 5.5105263157894735e-06, + "loss": 0.5584, + "step": 99390 + }, + { + "epoch": 0.99, + "learning_rate": 5.43157894736842e-06, + "loss": 0.5622, + "step": 99400 + }, + { + "epoch": 0.99, + "learning_rate": 5.3526315789473684e-06, + "loss": 0.562, + "step": 99410 + }, + { + "epoch": 0.99, + "learning_rate": 5.273684210526315e-06, + "loss": 0.572, + "step": 99420 + }, + { + "epoch": 0.99, + "learning_rate": 5.194736842105263e-06, + "loss": 0.5653, + "step": 99430 + }, + { + "epoch": 0.99, + "learning_rate": 5.11578947368421e-06, + "loss": 0.5621, + "step": 99440 + }, + { + "epoch": 0.99, + "learning_rate": 5.036842105263158e-06, + "loss": 0.5488, + "step": 99450 + }, + { + "epoch": 0.99, + "learning_rate": 4.957894736842105e-06, + "loss": 0.5379, + "step": 99460 + }, + { + "epoch": 0.99, + "learning_rate": 4.8789473684210515e-06, + "loss": 0.5541, + "step": 99470 + }, + { + "epoch": 0.99, + "learning_rate": 4.8e-06, + "loss": 0.548, + "step": 99480 + }, + { + "epoch": 0.99, + "learning_rate": 4.721052631578946e-06, + "loss": 0.5427, + "step": 99490 + }, + { + "epoch": 0.99, + "learning_rate": 4.642105263157895e-06, + "loss": 0.5592, + "step": 99500 + }, + { + "epoch": 1.0, + "learning_rate": 4.563157894736841e-06, + "loss": 0.5521, + "step": 99510 + }, + { + "epoch": 1.0, + "learning_rate": 4.484210526315789e-06, + "loss": 0.5467, + "step": 99520 + }, + { + "epoch": 1.0, + "learning_rate": 4.405263157894736e-06, + "loss": 0.5407, + "step": 99530 + }, + { + "epoch": 1.0, + "learning_rate": 4.326315789473684e-06, + "loss": 0.5413, + "step": 99540 + }, + { + "epoch": 1.0, + "learning_rate": 4.247368421052631e-06, + "loss": 0.5337, + "step": 99550 + }, + { + "epoch": 1.0, + "learning_rate": 4.168421052631579e-06, + "loss": 0.5315, + "step": 99560 + }, + { + "epoch": 1.0, + "learning_rate": 4.089473684210526e-06, + "loss": 0.5279, + "step": 99570 + }, + { + "epoch": 1.0, + "learning_rate": 4.0105263157894735e-06, + "loss": 0.5294, + "step": 99580 + }, + { + "epoch": 1.0, + "learning_rate": 3.93157894736842e-06, + "loss": 0.5355, + "step": 99590 + }, + { + "epoch": 1.0, + "learning_rate": 3.8526315789473676e-06, + "loss": 0.5406, + "step": 99600 + }, + { + "epoch": 1.0, + "learning_rate": 3.7736842105263154e-06, + "loss": 0.5498, + "step": 99610 + }, + { + "epoch": 1.0, + "learning_rate": 3.694736842105263e-06, + "loss": 0.55, + "step": 99620 + }, + { + "epoch": 1.0, + "learning_rate": 3.6157894736842103e-06, + "loss": 0.5627, + "step": 99630 + }, + { + "epoch": 1.0, + "learning_rate": 3.536842105263158e-06, + "loss": 0.5577, + "step": 99640 + }, + { + "epoch": 1.0, + "learning_rate": 3.4578947368421053e-06, + "loss": 0.5669, + "step": 99650 + }, + { + "epoch": 1.0, + "learning_rate": 3.378947368421052e-06, + "loss": 0.5615, + "step": 99660 + }, + { + "epoch": 1.0, + "learning_rate": 3.2999999999999993e-06, + "loss": 0.5618, + "step": 99670 + }, + { + "epoch": 1.0, + "learning_rate": 3.2210526315789468e-06, + "loss": 0.551, + "step": 99680 + }, + { + "epoch": 1.0, + "learning_rate": 3.1421052631578942e-06, + "loss": 0.5513, + "step": 99690 + }, + { + "epoch": 1.0, + "learning_rate": 3.0631578947368417e-06, + "loss": 0.5531, + "step": 99700 + }, + { + "epoch": 1.0, + "learning_rate": 2.984210526315789e-06, + "loss": 0.5431, + "step": 99710 + }, + { + "epoch": 1.0, + "learning_rate": 2.9052631578947366e-06, + "loss": 0.537, + "step": 99720 + }, + { + "epoch": 1.0, + "learning_rate": 2.826315789473684e-06, + "loss": 0.5366, + "step": 99730 + }, + { + "epoch": 1.0, + "learning_rate": 2.747368421052631e-06, + "loss": 0.5352, + "step": 99740 + }, + { + "epoch": 1.0, + "learning_rate": 2.6684210526315785e-06, + "loss": 0.5337, + "step": 99750 + }, + { + "epoch": 1.0, + "learning_rate": 2.589473684210526e-06, + "loss": 0.5264, + "step": 99760 + }, + { + "epoch": 1.0, + "learning_rate": 2.5105263157894735e-06, + "loss": 0.5306, + "step": 99770 + }, + { + "epoch": 1.0, + "learning_rate": 2.431578947368421e-06, + "loss": 0.537, + "step": 99780 + }, + { + "epoch": 1.0, + "learning_rate": 2.3526315789473684e-06, + "loss": 0.5183, + "step": 99790 + }, + { + "epoch": 1.0, + "learning_rate": 2.2736842105263154e-06, + "loss": 0.525, + "step": 99800 + }, + { + "epoch": 1.0, + "learning_rate": 2.194736842105263e-06, + "loss": 0.5179, + "step": 99810 + }, + { + "epoch": 1.0, + "learning_rate": 2.1157894736842103e-06, + "loss": 0.525, + "step": 99820 + }, + { + "epoch": 1.0, + "learning_rate": 2.0368421052631578e-06, + "loss": 0.5342, + "step": 99830 + }, + { + "epoch": 1.0, + "learning_rate": 1.957894736842105e-06, + "loss": 0.553, + "step": 99840 + }, + { + "epoch": 1.0, + "learning_rate": 1.8789473684210525e-06, + "loss": 0.5481, + "step": 99850 + }, + { + "epoch": 1.0, + "learning_rate": 1.8e-06, + "loss": 0.5443, + "step": 99860 + }, + { + "epoch": 1.0, + "learning_rate": 1.7210526315789474e-06, + "loss": 0.5509, + "step": 99870 + }, + { + "epoch": 1.0, + "learning_rate": 1.6421052631578944e-06, + "loss": 0.5453, + "step": 99880 + }, + { + "epoch": 1.0, + "learning_rate": 1.5631578947368419e-06, + "loss": 0.5505, + "step": 99890 + }, + { + "epoch": 1.0, + "learning_rate": 1.4842105263157893e-06, + "loss": 0.5574, + "step": 99900 + }, + { + "epoch": 1.0, + "learning_rate": 1.4052631578947368e-06, + "loss": 0.5573, + "step": 99910 + }, + { + "epoch": 1.0, + "learning_rate": 1.326315789473684e-06, + "loss": 0.553, + "step": 99920 + }, + { + "epoch": 1.0, + "learning_rate": 1.2473684210526315e-06, + "loss": 0.5504, + "step": 99930 + }, + { + "epoch": 1.0, + "learning_rate": 1.168421052631579e-06, + "loss": 0.5388, + "step": 99940 + }, + { + "epoch": 1.0, + "learning_rate": 1.0894736842105262e-06, + "loss": 0.534, + "step": 99950 + }, + { + "epoch": 1.0, + "learning_rate": 1.0105263157894736e-06, + "loss": 0.5417, + "step": 99960 + }, + { + "epoch": 1.0, + "learning_rate": 9.31578947368421e-07, + "loss": 0.5396, + "step": 99970 + }, + { + "epoch": 1.0, + "learning_rate": 8.526315789473684e-07, + "loss": 0.5378, + "step": 99980 + }, + { + "epoch": 1.0, + "learning_rate": 7.736842105263157e-07, + "loss": 0.5274, + "step": 99990 + }, + { + "epoch": 1.0, + "learning_rate": 6.947368421052631e-07, + "loss": 0.532, + "step": 100000 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.8854931609881757, + "eval_loss": 0.51953125, + "eval_runtime": 99.2784, + "eval_samples_per_second": 805.815, + "eval_steps_per_second": 1.581, + "step": 100000 + }, + { + "epoch": 1.0, + "step": 100000, + "total_flos": 3.58736203874304e+19, + "train_loss": 0.8820991159725189, + "train_runtime": 81002.545, + "train_samples_per_second": 1264.158, + "train_steps_per_second": 1.235 + } + ], + "logging_steps": 10, + "max_steps": 100000, + "num_train_epochs": 9223372036854775807, + "save_steps": 2500, + "total_flos": 3.58736203874304e+19, + "trial_name": null, + "trial_params": null +}