epoch: 1, lr: 3.00e-03, datapoints_seen: 167808 - train loss: 4.13 epoch: 2, lr: 5.99e-03, datapoints_seen: 335616 - train loss: 3.95 epoch: 3, lr: 8.99e-03, datapoints_seen: 503424 - train loss: 3.90 epoch: 4, lr: 1.20e-02, datapoints_seen: 671232 - train loss: 3.87 epoch: 5, lr: 1.50e-02, datapoints_seen: 839040 - train loss: 3.85 epoch: 6, lr: 1.50e-02, datapoints_seen: 1006848 - train loss: 3.84 epoch: 7, lr: 1.49e-02, datapoints_seen: 1174656 - train loss: 3.82 epoch: 8, lr: 1.48e-02, datapoints_seen: 1342464 - train loss: 3.82 epoch: 9, lr: 1.47e-02, datapoints_seen: 1510272 - train loss: 3.81 epoch: 10, lr: 1.45e-02, datapoints_seen: 1678080 - train loss: 3.80 epoch: 11, lr: 1.44e-02, datapoints_seen: 1845888 - train loss: 3.80 epoch: 12, lr: 1.41e-02, datapoints_seen: 2013696 - train loss: 3.80 epoch: 13, lr: 1.39e-02, datapoints_seen: 2181504 - train loss: 3.79 epoch: 14, lr: 1.36e-02, datapoints_seen: 2349312 - train loss: 3.79 epoch: 15, lr: 1.32e-02, datapoints_seen: 2517120 - train loss: 3.79 epoch: 16, lr: 1.29e-02, datapoints_seen: 2684928 - train loss: 3.79 epoch: 17, lr: 1.25e-02, datapoints_seen: 2852736 - train loss: 3.78 epoch: 18, lr: 1.21e-02, datapoints_seen: 3020544 - train loss: 3.78 epoch: 19, lr: 1.17e-02, datapoints_seen: 3188352 - train loss: 3.78 epoch: 20, lr: 1.13e-02, datapoints_seen: 3356160 - train loss: 3.78 epoch: 21, lr: 1.08e-02, datapoints_seen: 3523968 - train loss: 3.78 epoch: 22, lr: 1.03e-02, datapoints_seen: 3691776 - train loss: 3.77 epoch: 23, lr: 9.83e-03, datapoints_seen: 3859584 - train loss: 3.77 epoch: 24, lr: 9.32e-03, datapoints_seen: 4027392 - train loss: 3.77 epoch: 25, lr: 8.81e-03, datapoints_seen: 4195200 - train loss: 3.77 epoch: 26, lr: 8.29e-03, datapoints_seen: 4363008 - train loss: 3.77 epoch: 27, lr: 7.77e-03, datapoints_seen: 4530816 - train loss: 3.76 epoch: 28, lr: 7.25e-03, datapoints_seen: 4698624 - train loss: 3.76 epoch: 29, lr: 6.73e-03, datapoints_seen: 4866432 - train loss: 3.76 epoch: 30, lr: 6.21e-03, datapoints_seen: 5034240 - train loss: 3.76 epoch: 31, lr: 5.70e-03, datapoints_seen: 5202048 - train loss: 3.76 epoch: 32, lr: 5.19e-03, datapoints_seen: 5369856 - train loss: 3.75 epoch: 33, lr: 4.70e-03, datapoints_seen: 5537664 - train loss: 3.75 epoch: 34, lr: 4.22e-03, datapoints_seen: 5705472 - train loss: 3.75 epoch: 35, lr: 3.76e-03, datapoints_seen: 5873280 - train loss: 3.75 epoch: 36, lr: 3.32e-03, datapoints_seen: 6041088 - train loss: 3.75 epoch: 37, lr: 2.89e-03, datapoints_seen: 6208896 - train loss: 3.74 epoch: 38, lr: 2.49e-03, datapoints_seen: 6376704 - train loss: 3.74 epoch: 39, lr: 2.12e-03, datapoints_seen: 6544512 - train loss: 3.74 epoch: 40, lr: 1.77e-03, datapoints_seen: 6712320 - train loss: 3.74 epoch: 41, lr: 1.44e-03, datapoints_seen: 6880128 - train loss: 3.73 epoch: 42, lr: 1.15e-03, datapoints_seen: 7047936 - train loss: 3.73 epoch: 43, lr: 8.86e-04, datapoints_seen: 7215744 - train loss: 3.73 epoch: 44, lr: 6.56e-04, datapoints_seen: 7383552 - train loss: 3.73 epoch: 45, lr: 4.59e-04, datapoints_seen: 7551360 - train loss: 3.73 epoch: 46, lr: 2.96e-04, datapoints_seen: 7719168 - train loss: 3.73 epoch: 47, lr: 1.68e-04, datapoints_seen: 7886976 - train loss: 3.73 epoch: 48, lr: 7.57e-05, datapoints_seen: 8054784 - train loss: 3.73 epoch: 49, lr: 1.97e-05, datapoints_seen: 8222592 - train loss: 3.72 epoch: 50, lr: 3.26e-08, datapoints_seen: 8390400 - train loss: 3.72