|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.93374682830561, |
|
"eval_steps": 500, |
|
"global_step": 17500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.93752581844851e-06, |
|
"loss": 3.0868, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.42386919416686e-06, |
|
"loss": 3.9106, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.293324332157391e-06, |
|
"loss": 3.2471, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.910212569885209e-06, |
|
"loss": 2.2106, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.388708261178669e-06, |
|
"loss": 1.3444, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 8.77966770787574e-06, |
|
"loss": 1.1874, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.110219210139135e-06, |
|
"loss": 0.7707, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.396555945603558e-06, |
|
"loss": 1.07, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.649122845866272e-06, |
|
"loss": 0.8138, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.87505163689702e-06, |
|
"loss": 0.7951, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.997152349311819e-06, |
|
"loss": 1.8935, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.98766018035121e-06, |
|
"loss": 1.1202, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.978168011390604e-06, |
|
"loss": 0.8436, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.968675842429996e-06, |
|
"loss": 1.6259, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.959183673469387e-06, |
|
"loss": 0.688, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.94969150450878e-06, |
|
"loss": 1.2987, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.940199335548174e-06, |
|
"loss": 1.1667, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.930707166587566e-06, |
|
"loss": 0.9948, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.92121499762696e-06, |
|
"loss": 1.1458, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.911722828666351e-06, |
|
"loss": 1.2454, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.902230659705745e-06, |
|
"loss": 0.8821, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.892738490745136e-06, |
|
"loss": 0.7736, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.883246321784528e-06, |
|
"loss": 1.0794, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.873754152823922e-06, |
|
"loss": 1.4458, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.864261983863313e-06, |
|
"loss": 0.9957, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.854769814902707e-06, |
|
"loss": 0.6101, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.845277645942099e-06, |
|
"loss": 0.6848, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.83578547698149e-06, |
|
"loss": 1.6407, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.826293308020884e-06, |
|
"loss": 0.8201, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.816801139060275e-06, |
|
"loss": 0.9695, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.807308970099669e-06, |
|
"loss": 1.131, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.79781680113906e-06, |
|
"loss": 0.8368, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.788324632178452e-06, |
|
"loss": 1.0931, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.778832463217846e-06, |
|
"loss": 0.617, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.76934029425724e-06, |
|
"loss": 0.3638, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.759848125296631e-06, |
|
"loss": 1.0894, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.750355956336024e-06, |
|
"loss": 1.2748, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.740863787375416e-06, |
|
"loss": 1.4306, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.731371618414808e-06, |
|
"loss": 0.9422, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.721879449454201e-06, |
|
"loss": 0.6539, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.712387280493593e-06, |
|
"loss": 0.862, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.702895111532987e-06, |
|
"loss": 1.2179, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.693402942572378e-06, |
|
"loss": 0.9396, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.683910773611772e-06, |
|
"loss": 0.897, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.674418604651164e-06, |
|
"loss": 0.8183, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.664926435690555e-06, |
|
"loss": 0.5799, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.655434266729949e-06, |
|
"loss": 0.6694, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.64594209776934e-06, |
|
"loss": 0.6407, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.636449928808734e-06, |
|
"loss": 0.4138, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.626957759848126e-06, |
|
"loss": 1.255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.5974699854850769, |
|
"eval_runtime": 216.3078, |
|
"eval_samples_per_second": 9.343, |
|
"eval_steps_per_second": 2.339, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.617465590887517e-06, |
|
"loss": 1.0736, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.607973421926911e-06, |
|
"loss": 0.7988, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.598481252966304e-06, |
|
"loss": 0.7407, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.588989084005696e-06, |
|
"loss": 0.6603, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.57949691504509e-06, |
|
"loss": 0.5824, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.570004746084481e-06, |
|
"loss": 0.8746, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.560512577123873e-06, |
|
"loss": 0.3868, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.551020408163266e-06, |
|
"loss": 0.5346, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.541528239202658e-06, |
|
"loss": 0.559, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.532036070242052e-06, |
|
"loss": 0.8601, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.522543901281443e-06, |
|
"loss": 0.5392, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.513051732320835e-06, |
|
"loss": 0.6593, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.503559563360229e-06, |
|
"loss": 0.8227, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.49406739439962e-06, |
|
"loss": 0.6741, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.484575225439014e-06, |
|
"loss": 0.3784, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.475083056478406e-06, |
|
"loss": 0.4041, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.465590887517799e-06, |
|
"loss": 0.8986, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.45609871855719e-06, |
|
"loss": 0.6402, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.446606549596583e-06, |
|
"loss": 0.5837, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.437114380635976e-06, |
|
"loss": 0.7486, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.42762221167537e-06, |
|
"loss": 0.7009, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.418130042714761e-06, |
|
"loss": 0.4641, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.408637873754155e-06, |
|
"loss": 0.0882, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.399145704793546e-06, |
|
"loss": 0.4681, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.389653535832938e-06, |
|
"loss": 0.5282, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.380161366872332e-06, |
|
"loss": 0.7416, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.370669197911723e-06, |
|
"loss": 0.6423, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.361177028951117e-06, |
|
"loss": 0.6967, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.351684859990508e-06, |
|
"loss": 0.4016, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.3421926910299e-06, |
|
"loss": 0.698, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.332700522069294e-06, |
|
"loss": 0.4438, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.323208353108685e-06, |
|
"loss": 0.5083, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.313716184148079e-06, |
|
"loss": 0.8066, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.30422401518747e-06, |
|
"loss": 0.5104, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.294731846226862e-06, |
|
"loss": 0.383, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.285239677266256e-06, |
|
"loss": 0.4484, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.275747508305648e-06, |
|
"loss": 0.5572, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.266255339345041e-06, |
|
"loss": 0.6063, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.256763170384434e-06, |
|
"loss": 0.6665, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.247271001423826e-06, |
|
"loss": 0.6123, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.23777883246322e-06, |
|
"loss": 0.5005, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.228286663502611e-06, |
|
"loss": 0.9236, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.218794494542003e-06, |
|
"loss": 0.4072, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.209302325581397e-06, |
|
"loss": 0.6445, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.199810156620788e-06, |
|
"loss": 0.9185, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.190317987660182e-06, |
|
"loss": 0.261, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.180825818699574e-06, |
|
"loss": 0.7811, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.171333649738965e-06, |
|
"loss": 0.7295, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.161841480778359e-06, |
|
"loss": 0.3809, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.15234931181775e-06, |
|
"loss": 0.6035, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.48104238510131836, |
|
"eval_runtime": 210.5024, |
|
"eval_samples_per_second": 9.601, |
|
"eval_steps_per_second": 2.404, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.142857142857144e-06, |
|
"loss": 0.9329, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.133364973896536e-06, |
|
"loss": 0.65, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.123872804935927e-06, |
|
"loss": 0.3792, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.11438063597532e-06, |
|
"loss": 0.1691, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.104888467014713e-06, |
|
"loss": 0.6827, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.095396298054106e-06, |
|
"loss": 0.6459, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.0859041290935e-06, |
|
"loss": 0.8112, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.076411960132891e-06, |
|
"loss": 0.6337, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.066919791172285e-06, |
|
"loss": 0.4256, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.057427622211676e-06, |
|
"loss": 0.2803, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.047935453251068e-06, |
|
"loss": 0.5562, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.038443284290462e-06, |
|
"loss": 0.3124, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.028951115329853e-06, |
|
"loss": 0.3223, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.019458946369247e-06, |
|
"loss": 0.5686, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.009966777408639e-06, |
|
"loss": 0.2857, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.00047460844803e-06, |
|
"loss": 0.4192, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.990982439487424e-06, |
|
"loss": 0.2262, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 8.981490270526815e-06, |
|
"loss": 0.6804, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.971998101566209e-06, |
|
"loss": 0.3721, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.9625059326056e-06, |
|
"loss": 0.47, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.953013763644992e-06, |
|
"loss": 0.7316, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 8.943521594684386e-06, |
|
"loss": 0.7199, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.934029425723778e-06, |
|
"loss": 0.6315, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.924537256763171e-06, |
|
"loss": 0.4095, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.915045087802565e-06, |
|
"loss": 1.0135, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.905552918841956e-06, |
|
"loss": 0.6973, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.89606074988135e-06, |
|
"loss": 0.3849, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.886568580920741e-06, |
|
"loss": 0.4011, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 8.877076411960133e-06, |
|
"loss": 0.2936, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.867584242999527e-06, |
|
"loss": 0.5101, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.858092074038918e-06, |
|
"loss": 0.4414, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.848599905078312e-06, |
|
"loss": 0.5243, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 8.839107736117704e-06, |
|
"loss": 0.3593, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.829615567157095e-06, |
|
"loss": 1.0084, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.820123398196489e-06, |
|
"loss": 0.612, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 8.81063122923588e-06, |
|
"loss": 0.6974, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.801139060275274e-06, |
|
"loss": 0.0845, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.791646891314666e-06, |
|
"loss": 0.4725, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.782154722354057e-06, |
|
"loss": 0.5528, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 8.772662553393451e-06, |
|
"loss": 0.4501, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.763170384432843e-06, |
|
"loss": 0.5435, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.753678215472236e-06, |
|
"loss": 0.3251, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.74418604651163e-06, |
|
"loss": 0.5594, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.734693877551021e-06, |
|
"loss": 0.7562, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.725201708590415e-06, |
|
"loss": 0.463, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.715709539629807e-06, |
|
"loss": 0.5296, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 8.706217370669198e-06, |
|
"loss": 0.6942, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.696725201708592e-06, |
|
"loss": 0.6386, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.687233032747983e-06, |
|
"loss": 0.4996, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.677740863787377e-06, |
|
"loss": 0.2947, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.43847087025642395, |
|
"eval_runtime": 211.0507, |
|
"eval_samples_per_second": 9.576, |
|
"eval_steps_per_second": 2.398, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.668248694826769e-06, |
|
"loss": 0.5069, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.65875652586616e-06, |
|
"loss": 0.5187, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.649264356905554e-06, |
|
"loss": 0.7606, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.639772187944946e-06, |
|
"loss": 0.3025, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.630280018984339e-06, |
|
"loss": 0.5177, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.62078785002373e-06, |
|
"loss": 0.7777, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 8.611295681063123e-06, |
|
"loss": 0.4197, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.601803512102516e-06, |
|
"loss": 0.2935, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.59231134314191e-06, |
|
"loss": 0.3054, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.582819174181301e-06, |
|
"loss": 0.3883, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.573327005220695e-06, |
|
"loss": 0.5324, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.563834836260086e-06, |
|
"loss": 0.5354, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.554342667299478e-06, |
|
"loss": 0.6134, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 8.544850498338872e-06, |
|
"loss": 0.289, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.535358329378263e-06, |
|
"loss": 0.2473, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.525866160417657e-06, |
|
"loss": 0.3148, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.516373991457048e-06, |
|
"loss": 0.5027, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 8.506881822496442e-06, |
|
"loss": 0.24, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.497389653535834e-06, |
|
"loss": 0.5346, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.487897484575225e-06, |
|
"loss": 0.5567, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.478405315614619e-06, |
|
"loss": 0.3816, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 8.46891314665401e-06, |
|
"loss": 0.499, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.459420977693404e-06, |
|
"loss": 0.6085, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.449928808732796e-06, |
|
"loss": 0.5301, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 8.440436639772188e-06, |
|
"loss": 0.5552, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.430944470811581e-06, |
|
"loss": 0.3411, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.421452301850974e-06, |
|
"loss": 0.9363, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.411960132890366e-06, |
|
"loss": 0.6064, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.40246796392976e-06, |
|
"loss": 0.5264, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.392975794969151e-06, |
|
"loss": 0.4054, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.383483626008543e-06, |
|
"loss": 0.4287, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.373991457047937e-06, |
|
"loss": 0.1082, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.364499288087328e-06, |
|
"loss": 0.6657, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.355007119126722e-06, |
|
"loss": 0.5962, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.345514950166114e-06, |
|
"loss": 0.4596, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 8.336022781205505e-06, |
|
"loss": 0.3728, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.326530612244899e-06, |
|
"loss": 0.5586, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.31703844328429e-06, |
|
"loss": 0.4651, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.307546274323684e-06, |
|
"loss": 0.5714, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.298054105363076e-06, |
|
"loss": 0.525, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.288561936402469e-06, |
|
"loss": 0.3305, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.279069767441861e-06, |
|
"loss": 0.5429, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 8.269577598481253e-06, |
|
"loss": 0.5503, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.260085429520646e-06, |
|
"loss": 0.303, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.25059326056004e-06, |
|
"loss": 0.4068, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.241101091599431e-06, |
|
"loss": 0.1963, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.231608922638825e-06, |
|
"loss": 0.2071, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.222116753678216e-06, |
|
"loss": 0.3768, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.212624584717608e-06, |
|
"loss": 0.3167, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.203132415757002e-06, |
|
"loss": 0.5134, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.4093641936779022, |
|
"eval_runtime": 211.8311, |
|
"eval_samples_per_second": 9.541, |
|
"eval_steps_per_second": 2.389, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.193640246796393e-06, |
|
"loss": 0.5266, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.184148077835787e-06, |
|
"loss": 0.3986, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.174655908875179e-06, |
|
"loss": 0.4266, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.16516373991457e-06, |
|
"loss": 0.3662, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.155671570953964e-06, |
|
"loss": 0.6357, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.146179401993356e-06, |
|
"loss": 0.6372, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.136687233032749e-06, |
|
"loss": 0.5443, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.12719506407214e-06, |
|
"loss": 0.2663, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.117702895111534e-06, |
|
"loss": 0.5385, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.108210726150926e-06, |
|
"loss": 0.3451, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.098718557190318e-06, |
|
"loss": 0.5365, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.089226388229711e-06, |
|
"loss": 0.4255, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.079734219269105e-06, |
|
"loss": 0.4673, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.070242050308496e-06, |
|
"loss": 0.7596, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.06074988134789e-06, |
|
"loss": 0.2713, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.051257712387281e-06, |
|
"loss": 0.3279, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.041765543426673e-06, |
|
"loss": 0.7999, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.032273374466067e-06, |
|
"loss": 0.4163, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 8.022781205505458e-06, |
|
"loss": 0.2294, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 8.013289036544852e-06, |
|
"loss": 0.6922, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 8.003796867584244e-06, |
|
"loss": 0.656, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.994304698623635e-06, |
|
"loss": 0.407, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.984812529663029e-06, |
|
"loss": 0.4617, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.97532036070242e-06, |
|
"loss": 0.4542, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.965828191741814e-06, |
|
"loss": 0.5353, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.956336022781206e-06, |
|
"loss": 0.3015, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.946843853820598e-06, |
|
"loss": 0.5876, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.937351684859991e-06, |
|
"loss": 0.44, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.927859515899383e-06, |
|
"loss": 0.3684, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.918367346938776e-06, |
|
"loss": 0.3488, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.90887517797817e-06, |
|
"loss": 0.4936, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.899383009017561e-06, |
|
"loss": 0.4513, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.889890840056955e-06, |
|
"loss": 0.4719, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.880398671096347e-06, |
|
"loss": 0.4911, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.870906502135738e-06, |
|
"loss": 0.5195, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.861414333175132e-06, |
|
"loss": 0.2038, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.851922164214523e-06, |
|
"loss": 0.3553, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.842429995253917e-06, |
|
"loss": 0.5719, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.832937826293309e-06, |
|
"loss": 0.7717, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.8234456573327e-06, |
|
"loss": 0.1035, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.813953488372094e-06, |
|
"loss": 0.5225, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.804461319411486e-06, |
|
"loss": 0.4826, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.794969150450879e-06, |
|
"loss": 0.573, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.78547698149027e-06, |
|
"loss": 0.2182, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.775984812529663e-06, |
|
"loss": 0.6481, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.766492643569056e-06, |
|
"loss": 0.3904, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.757000474608448e-06, |
|
"loss": 0.7893, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.747508305647841e-06, |
|
"loss": 0.4534, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.738016136687235e-06, |
|
"loss": 0.6228, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 7.728523967726626e-06, |
|
"loss": 0.345, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.3839055895805359, |
|
"eval_runtime": 210.4029, |
|
"eval_samples_per_second": 9.605, |
|
"eval_steps_per_second": 2.405, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.71903179876602e-06, |
|
"loss": 0.5411, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.709539629805412e-06, |
|
"loss": 0.5758, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 7.700047460844803e-06, |
|
"loss": 0.3009, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.690555291884197e-06, |
|
"loss": 0.5573, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.681063122923589e-06, |
|
"loss": 0.627, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.671570953962982e-06, |
|
"loss": 0.5643, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.662078785002374e-06, |
|
"loss": 0.6044, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.652586616041765e-06, |
|
"loss": 0.3706, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.643094447081159e-06, |
|
"loss": 0.4161, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.63360227812055e-06, |
|
"loss": 0.4195, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.624110109159943e-06, |
|
"loss": 0.619, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.614617940199336e-06, |
|
"loss": 0.5089, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.6051257712387284e-06, |
|
"loss": 0.288, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.595633602278121e-06, |
|
"loss": 0.3765, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.586141433317513e-06, |
|
"loss": 0.6771, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.576649264356905e-06, |
|
"loss": 0.3748, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.567157095396299e-06, |
|
"loss": 0.5745, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.557664926435691e-06, |
|
"loss": 0.6296, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.548172757475084e-06, |
|
"loss": 0.3589, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.538680588514477e-06, |
|
"loss": 0.6862, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.529188419553869e-06, |
|
"loss": 0.5385, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.519696250593261e-06, |
|
"loss": 0.2568, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.5102040816326536e-06, |
|
"loss": 0.3519, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.500711912672046e-06, |
|
"loss": 0.3583, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.491219743711439e-06, |
|
"loss": 0.4923, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.481727574750831e-06, |
|
"loss": 0.3252, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.472235405790224e-06, |
|
"loss": 0.4134, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.462743236829616e-06, |
|
"loss": 0.4374, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.453251067869008e-06, |
|
"loss": 0.2847, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.443758898908401e-06, |
|
"loss": 0.3431, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 7.4342667299477935e-06, |
|
"loss": 0.3763, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.424774560987186e-06, |
|
"loss": 0.4108, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.415282392026578e-06, |
|
"loss": 0.2789, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.405790223065972e-06, |
|
"loss": 0.2425, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.396298054105364e-06, |
|
"loss": 0.3791, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.3868058851447565e-06, |
|
"loss": 0.5064, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.377313716184149e-06, |
|
"loss": 0.3999, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.367821547223542e-06, |
|
"loss": 0.4088, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 7.358329378262934e-06, |
|
"loss": 0.3158, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.348837209302326e-06, |
|
"loss": 0.3839, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.339345040341719e-06, |
|
"loss": 0.7998, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 7.329852871381111e-06, |
|
"loss": 0.4684, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.320360702420504e-06, |
|
"loss": 0.3617, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.310868533459896e-06, |
|
"loss": 0.3617, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.301376364499288e-06, |
|
"loss": 0.4053, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.291884195538681e-06, |
|
"loss": 0.4896, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.282392026578073e-06, |
|
"loss": 0.385, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.272899857617466e-06, |
|
"loss": 0.376, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.2634076886568585e-06, |
|
"loss": 0.49, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.253915519696251e-06, |
|
"loss": 0.3913, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.37262919545173645, |
|
"eval_runtime": 210.8205, |
|
"eval_samples_per_second": 9.586, |
|
"eval_steps_per_second": 2.4, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.244423350735643e-06, |
|
"loss": 0.5203, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.234931181775036e-06, |
|
"loss": 0.2866, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.225439012814429e-06, |
|
"loss": 0.322, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.2159468438538215e-06, |
|
"loss": 0.4069, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.206454674893214e-06, |
|
"loss": 0.4716, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.196962505932607e-06, |
|
"loss": 0.6109, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.187470336971999e-06, |
|
"loss": 0.5786, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.177978168011391e-06, |
|
"loss": 0.5905, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.168485999050784e-06, |
|
"loss": 0.425, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.158993830090176e-06, |
|
"loss": 0.3144, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.149501661129569e-06, |
|
"loss": 0.4081, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.1400094921689614e-06, |
|
"loss": 0.3787, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.130517323208353e-06, |
|
"loss": 0.2301, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.121025154247746e-06, |
|
"loss": 0.318, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.111532985287138e-06, |
|
"loss": 0.4586, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.102040816326531e-06, |
|
"loss": 0.4389, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 7.0925486473659236e-06, |
|
"loss": 0.473, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.083056478405316e-06, |
|
"loss": 0.1068, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.073564309444708e-06, |
|
"loss": 0.243, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.064072140484101e-06, |
|
"loss": 0.3434, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.054579971523494e-06, |
|
"loss": 0.5638, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.0450878025628865e-06, |
|
"loss": 0.448, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.035595633602279e-06, |
|
"loss": 0.6347, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.026103464641672e-06, |
|
"loss": 0.7818, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.016611295681064e-06, |
|
"loss": 0.4482, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.007119126720456e-06, |
|
"loss": 0.6219, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.997626957759849e-06, |
|
"loss": 0.2313, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.988134788799241e-06, |
|
"loss": 0.1876, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.978642619838634e-06, |
|
"loss": 0.4771, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.9691504508780265e-06, |
|
"loss": 0.4634, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.959658281917418e-06, |
|
"loss": 0.489, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.950166112956811e-06, |
|
"loss": 0.3812, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.940673943996203e-06, |
|
"loss": 0.4829, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.931181775035596e-06, |
|
"loss": 0.495, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.921689606074989e-06, |
|
"loss": 0.4593, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.91219743711438e-06, |
|
"loss": 0.3818, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.902705268153773e-06, |
|
"loss": 0.3838, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.893213099193166e-06, |
|
"loss": 0.2638, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.883720930232559e-06, |
|
"loss": 0.3994, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.874228761271952e-06, |
|
"loss": 0.2879, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.864736592311344e-06, |
|
"loss": 0.5988, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.855244423350737e-06, |
|
"loss": 0.4441, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.8457522543901285e-06, |
|
"loss": 0.4185, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.836260085429521e-06, |
|
"loss": 0.4659, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.826767916468914e-06, |
|
"loss": 0.4107, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.817275747508306e-06, |
|
"loss": 0.2984, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.807783578547699e-06, |
|
"loss": 0.5467, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.7982914095870915e-06, |
|
"loss": 0.5183, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.788799240626483e-06, |
|
"loss": 0.1466, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.779307071665876e-06, |
|
"loss": 0.2805, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.36545732617378235, |
|
"eval_runtime": 211.5781, |
|
"eval_samples_per_second": 9.552, |
|
"eval_steps_per_second": 2.392, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.7698149027052685e-06, |
|
"loss": 0.7174, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.760322733744661e-06, |
|
"loss": 0.6719, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.750830564784054e-06, |
|
"loss": 0.4309, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.741338395823445e-06, |
|
"loss": 0.2661, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.731846226862838e-06, |
|
"loss": 0.5369, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.7223540579022314e-06, |
|
"loss": 0.3933, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.712861888941624e-06, |
|
"loss": 0.456, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.703369719981017e-06, |
|
"loss": 0.7618, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.693877551020409e-06, |
|
"loss": 0.5126, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.684385382059802e-06, |
|
"loss": 0.2889, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.6748932130991936e-06, |
|
"loss": 0.6109, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.665401044138586e-06, |
|
"loss": 0.3158, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 6.655908875177979e-06, |
|
"loss": 0.4721, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.646416706217371e-06, |
|
"loss": 0.4659, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.636924537256764e-06, |
|
"loss": 0.4783, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.627432368296156e-06, |
|
"loss": 0.3333, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.617940199335548e-06, |
|
"loss": 0.3447, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.608448030374941e-06, |
|
"loss": 0.5593, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.5989558614143335e-06, |
|
"loss": 0.3474, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 6.589463692453726e-06, |
|
"loss": 0.5889, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.579971523493119e-06, |
|
"loss": 0.1262, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.5704793545325104e-06, |
|
"loss": 0.5701, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.560987185571903e-06, |
|
"loss": 0.6356, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 6.5514950166112965e-06, |
|
"loss": 0.5827, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.542002847650689e-06, |
|
"loss": 0.4105, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.532510678690082e-06, |
|
"loss": 0.4191, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.523018509729474e-06, |
|
"loss": 0.3027, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.513526340768867e-06, |
|
"loss": 0.395, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.504034171808259e-06, |
|
"loss": 0.2587, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.494542002847651e-06, |
|
"loss": 0.2284, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 6.485049833887044e-06, |
|
"loss": 0.4834, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.475557664926436e-06, |
|
"loss": 0.1843, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.466065495965829e-06, |
|
"loss": 0.5196, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 6.456573327005221e-06, |
|
"loss": 0.4602, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.447081158044613e-06, |
|
"loss": 0.1406, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.437588989084006e-06, |
|
"loss": 0.3493, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.4280968201233985e-06, |
|
"loss": 0.4312, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.418604651162791e-06, |
|
"loss": 0.5843, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.409112482202183e-06, |
|
"loss": 0.4368, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.3996203132415755e-06, |
|
"loss": 0.4638, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 6.390128144280968e-06, |
|
"loss": 0.5721, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.3806359753203615e-06, |
|
"loss": 0.2774, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.371143806359754e-06, |
|
"loss": 0.641, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.361651637399147e-06, |
|
"loss": 0.3003, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 6.352159468438539e-06, |
|
"loss": 0.5912, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.342667299477931e-06, |
|
"loss": 0.5673, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.333175130517324e-06, |
|
"loss": 0.3721, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.323682961556716e-06, |
|
"loss": 0.5748, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.314190792596109e-06, |
|
"loss": 0.384, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.3046986236355014e-06, |
|
"loss": 0.6733, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 0.3598354458808899, |
|
"eval_runtime": 209.9575, |
|
"eval_samples_per_second": 9.626, |
|
"eval_steps_per_second": 2.41, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.295206454674894e-06, |
|
"loss": 0.29, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.2849, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.276222116753678e-06, |
|
"loss": 0.5834, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.266729947793071e-06, |
|
"loss": 0.1479, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.257237778832464e-06, |
|
"loss": 0.5803, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 6.247745609871856e-06, |
|
"loss": 0.2966, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.238253440911248e-06, |
|
"loss": 0.3872, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.2287612719506405e-06, |
|
"loss": 0.4973, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.219269102990034e-06, |
|
"loss": 0.6415, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.2097769340294266e-06, |
|
"loss": 0.4729, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.200284765068819e-06, |
|
"loss": 0.5401, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.190792596108212e-06, |
|
"loss": 0.2515, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 6.181300427147604e-06, |
|
"loss": 0.3167, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.171808258186996e-06, |
|
"loss": 0.6597, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.162316089226389e-06, |
|
"loss": 0.4192, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 6.152823920265781e-06, |
|
"loss": 0.2399, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.143331751305174e-06, |
|
"loss": 0.3981, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.1338395823445665e-06, |
|
"loss": 0.3968, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.124347413383958e-06, |
|
"loss": 0.3704, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.114855244423351e-06, |
|
"loss": 0.4162, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.1053630754627434e-06, |
|
"loss": 0.3179, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.095870906502136e-06, |
|
"loss": 0.4292, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 6.086378737541529e-06, |
|
"loss": 0.4461, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.076886568580921e-06, |
|
"loss": 0.4048, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.067394399620313e-06, |
|
"loss": 0.4935, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.0579022306597056e-06, |
|
"loss": 0.2367, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.048410061699099e-06, |
|
"loss": 0.354, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 6.038917892738492e-06, |
|
"loss": 0.705, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 6.029425723777884e-06, |
|
"loss": 0.5404, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 6.019933554817277e-06, |
|
"loss": 0.4148, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.010441385856669e-06, |
|
"loss": 0.2862, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.000949216896061e-06, |
|
"loss": 0.5574, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.991457047935454e-06, |
|
"loss": 0.3857, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.981964878974846e-06, |
|
"loss": 0.4672, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.972472710014239e-06, |
|
"loss": 0.5925, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.9629805410536315e-06, |
|
"loss": 0.4423, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.953488372093023e-06, |
|
"loss": 0.4828, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.943996203132416e-06, |
|
"loss": 0.5208, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.9345040341718085e-06, |
|
"loss": 0.3895, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.925011865211201e-06, |
|
"loss": 0.4667, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.915519696250594e-06, |
|
"loss": 0.5809, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.906027527289986e-06, |
|
"loss": 0.4157, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.896535358329378e-06, |
|
"loss": 0.2807, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.887043189368771e-06, |
|
"loss": 0.5036, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.877551020408164e-06, |
|
"loss": 0.3498, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.868058851447557e-06, |
|
"loss": 0.46, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.858566682486949e-06, |
|
"loss": 0.2416, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.849074513526342e-06, |
|
"loss": 0.3104, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.8395823445657344e-06, |
|
"loss": 0.6617, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.830090175605126e-06, |
|
"loss": 0.2603, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 0.3565267324447632, |
|
"eval_runtime": 210.4541, |
|
"eval_samples_per_second": 9.603, |
|
"eval_steps_per_second": 2.404, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.820598006644519e-06, |
|
"loss": 0.1701, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.811105837683911e-06, |
|
"loss": 0.3745, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.801613668723304e-06, |
|
"loss": 0.296, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.7921214997626966e-06, |
|
"loss": 0.1971, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.782629330802088e-06, |
|
"loss": 0.3824, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.773137161841481e-06, |
|
"loss": 0.4496, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.7636449928808735e-06, |
|
"loss": 0.3678, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.754152823920266e-06, |
|
"loss": 0.368, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.744660654959659e-06, |
|
"loss": 0.4836, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.7351684859990505e-06, |
|
"loss": 0.4723, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.725676317038443e-06, |
|
"loss": 0.2437, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.716184148077836e-06, |
|
"loss": 0.1526, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.706691979117229e-06, |
|
"loss": 0.1451, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.697199810156622e-06, |
|
"loss": 0.4266, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.687707641196014e-06, |
|
"loss": 0.5522, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.678215472235407e-06, |
|
"loss": 0.4187, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.668723303274799e-06, |
|
"loss": 0.3749, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.659231134314191e-06, |
|
"loss": 0.156, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.649738965353584e-06, |
|
"loss": 0.1152, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.6402467963929764e-06, |
|
"loss": 0.4565, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.630754627432369e-06, |
|
"loss": 0.328, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.621262458471762e-06, |
|
"loss": 0.4919, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.611770289511153e-06, |
|
"loss": 0.3646, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.602278120550546e-06, |
|
"loss": 0.3677, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.5927859515899386e-06, |
|
"loss": 0.2724, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.583293782629331e-06, |
|
"loss": 0.3406, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.573801613668724e-06, |
|
"loss": 0.4709, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.5643094447081155e-06, |
|
"loss": 0.3161, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.554817275747508e-06, |
|
"loss": 0.4555, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.545325106786901e-06, |
|
"loss": 0.4641, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.535832937826294e-06, |
|
"loss": 0.4858, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.526340768865687e-06, |
|
"loss": 0.4066, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.516848599905079e-06, |
|
"loss": 0.4246, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.507356430944472e-06, |
|
"loss": 0.3599, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.497864261983864e-06, |
|
"loss": 0.4388, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.488372093023256e-06, |
|
"loss": 0.2378, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5.478879924062649e-06, |
|
"loss": 0.512, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.4693877551020415e-06, |
|
"loss": 0.5356, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.459895586141434e-06, |
|
"loss": 0.723, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.450403417180826e-06, |
|
"loss": 0.4442, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5.440911248220218e-06, |
|
"loss": 0.5757, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.431419079259611e-06, |
|
"loss": 0.3083, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.421926910299004e-06, |
|
"loss": 0.3306, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 5.412434741338396e-06, |
|
"loss": 0.3638, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.402942572377789e-06, |
|
"loss": 0.3285, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.3934504034171805e-06, |
|
"loss": 0.5824, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.383958234456573e-06, |
|
"loss": 0.4002, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 5.374466065495966e-06, |
|
"loss": 0.1342, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.364973896535359e-06, |
|
"loss": 0.3491, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.355481727574752e-06, |
|
"loss": 0.5927, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.3536190092563629, |
|
"eval_runtime": 209.407, |
|
"eval_samples_per_second": 9.651, |
|
"eval_steps_per_second": 2.416, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5.345989558614144e-06, |
|
"loss": 0.1028, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.336497389653537e-06, |
|
"loss": 0.206, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.327005220692929e-06, |
|
"loss": 0.2538, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.317513051732321e-06, |
|
"loss": 0.4764, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 5.308020882771714e-06, |
|
"loss": 0.4456, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.2985287138111065e-06, |
|
"loss": 0.2988, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.289036544850499e-06, |
|
"loss": 0.4721, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.279544375889891e-06, |
|
"loss": 0.4259, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.2700522069292835e-06, |
|
"loss": 0.1379, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.260560037968676e-06, |
|
"loss": 0.4755, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.251067869008069e-06, |
|
"loss": 0.2663, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.241575700047461e-06, |
|
"loss": 0.3229, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.232083531086853e-06, |
|
"loss": 0.2666, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.222591362126246e-06, |
|
"loss": 0.3572, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.213099193165638e-06, |
|
"loss": 0.4938, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 5.203607024205031e-06, |
|
"loss": 0.5069, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.194114855244424e-06, |
|
"loss": 0.6373, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.184622686283817e-06, |
|
"loss": 0.4395, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.175130517323209e-06, |
|
"loss": 0.3435, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.165638348362601e-06, |
|
"loss": 0.2505, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.156146179401994e-06, |
|
"loss": 0.4521, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.146654010441386e-06, |
|
"loss": 0.4649, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 5.137161841480779e-06, |
|
"loss": 0.429, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.1276696725201716e-06, |
|
"loss": 0.4792, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.118177503559564e-06, |
|
"loss": 0.4247, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5.108685334598956e-06, |
|
"loss": 0.5332, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.0991931656383485e-06, |
|
"loss": 0.1256, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.089700996677741e-06, |
|
"loss": 0.6229, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.080208827717134e-06, |
|
"loss": 0.5491, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.070716658756526e-06, |
|
"loss": 0.5865, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.061224489795918e-06, |
|
"loss": 0.3055, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.051732320835311e-06, |
|
"loss": 0.2392, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5.042240151874703e-06, |
|
"loss": 0.2454, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.032747982914097e-06, |
|
"loss": 0.3592, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.023255813953489e-06, |
|
"loss": 0.388, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.013763644992882e-06, |
|
"loss": 0.1818, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 5.0042714760322745e-06, |
|
"loss": 0.3422, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.994779307071666e-06, |
|
"loss": 0.4801, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.985287138111059e-06, |
|
"loss": 0.7783, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.975794969150451e-06, |
|
"loss": 0.1979, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.966302800189844e-06, |
|
"loss": 0.1784, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.956810631229237e-06, |
|
"loss": 0.3514, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.947318462268629e-06, |
|
"loss": 0.4134, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.937826293308021e-06, |
|
"loss": 0.3057, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.9283341243474135e-06, |
|
"loss": 0.5433, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.918841955386806e-06, |
|
"loss": 0.6126, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.909349786426199e-06, |
|
"loss": 0.2329, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.899857617465591e-06, |
|
"loss": 0.3048, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.890365448504984e-06, |
|
"loss": 0.6663, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.8808732795443765e-06, |
|
"loss": 0.3444, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.3520536720752716, |
|
"eval_runtime": 210.3626, |
|
"eval_samples_per_second": 9.607, |
|
"eval_steps_per_second": 2.405, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.871381110583769e-06, |
|
"loss": 0.7078, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.861888941623161e-06, |
|
"loss": 0.3893, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.8523967726625535e-06, |
|
"loss": 0.3299, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.842904603701946e-06, |
|
"loss": 0.2718, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.833412434741339e-06, |
|
"loss": 0.4207, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.823920265780731e-06, |
|
"loss": 0.2569, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.814428096820124e-06, |
|
"loss": 0.3158, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.8049359278595164e-06, |
|
"loss": 0.4117, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.795443758898909e-06, |
|
"loss": 0.4646, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.785951589938302e-06, |
|
"loss": 0.3877, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.776459420977693e-06, |
|
"loss": 0.2987, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.766967252017086e-06, |
|
"loss": 0.3859, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.757475083056479e-06, |
|
"loss": 0.138, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.747982914095871e-06, |
|
"loss": 0.2836, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.738490745135264e-06, |
|
"loss": 0.4993, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.728998576174656e-06, |
|
"loss": 0.4256, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.719506407214049e-06, |
|
"loss": 0.5983, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.7100142382534416e-06, |
|
"loss": 0.4265, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.700522069292834e-06, |
|
"loss": 0.2854, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.691029900332226e-06, |
|
"loss": 0.3984, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.6815377313716185e-06, |
|
"loss": 0.34, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.672045562411011e-06, |
|
"loss": 0.4355, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.662553393450404e-06, |
|
"loss": 0.4004, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.653061224489796e-06, |
|
"loss": 0.3462, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.643569055529189e-06, |
|
"loss": 0.3231, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.6340768865685815e-06, |
|
"loss": 0.6457, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.624584717607974e-06, |
|
"loss": 0.4086, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.615092548647367e-06, |
|
"loss": 0.2528, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.6056003796867584e-06, |
|
"loss": 0.5488, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.596108210726151e-06, |
|
"loss": 0.381, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.586616041765544e-06, |
|
"loss": 0.5675, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.577123872804936e-06, |
|
"loss": 0.5866, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.567631703844329e-06, |
|
"loss": 0.4035, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.558139534883721e-06, |
|
"loss": 0.2322, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.548647365923114e-06, |
|
"loss": 0.717, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.539155196962507e-06, |
|
"loss": 0.5347, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.529663028001899e-06, |
|
"loss": 0.2423, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.520170859041291e-06, |
|
"loss": 0.439, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.5106786900806835e-06, |
|
"loss": 0.4595, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.501186521120076e-06, |
|
"loss": 0.5906, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.491694352159469e-06, |
|
"loss": 0.5294, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.482202183198861e-06, |
|
"loss": 0.2951, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.472710014238254e-06, |
|
"loss": 0.6254, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.4632178452776465e-06, |
|
"loss": 0.5945, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.453725676317039e-06, |
|
"loss": 0.4814, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.444233507356432e-06, |
|
"loss": 0.4048, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.4347413383958235e-06, |
|
"loss": 0.1721, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.425249169435216e-06, |
|
"loss": 0.2362, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.415757000474609e-06, |
|
"loss": 0.2302, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.406264831514001e-06, |
|
"loss": 0.4041, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.34984728693962097, |
|
"eval_runtime": 212.0644, |
|
"eval_samples_per_second": 9.53, |
|
"eval_steps_per_second": 2.386, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.396772662553394e-06, |
|
"loss": 0.8092, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.3872804935927865e-06, |
|
"loss": 0.327, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.377788324632179e-06, |
|
"loss": 0.3231, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.368296155671572e-06, |
|
"loss": 0.4055, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.358803986710964e-06, |
|
"loss": 0.2245, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.349311817750356e-06, |
|
"loss": 0.4406, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.339819648789749e-06, |
|
"loss": 0.2078, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.330327479829141e-06, |
|
"loss": 0.5263, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.320835310868534e-06, |
|
"loss": 0.6044, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.311343141907926e-06, |
|
"loss": 0.3704, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.301850972947319e-06, |
|
"loss": 0.4697, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.2923588039867116e-06, |
|
"loss": 0.3077, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.282866635026104e-06, |
|
"loss": 0.4685, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.273374466065496e-06, |
|
"loss": 0.4972, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.2638822971048885e-06, |
|
"loss": 0.4597, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.254390128144281e-06, |
|
"loss": 0.2447, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.244897959183674e-06, |
|
"loss": 0.5126, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.235405790223066e-06, |
|
"loss": 0.3077, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.225913621262459e-06, |
|
"loss": 0.3864, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.2164214523018515e-06, |
|
"loss": 0.5483, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.206929283341244e-06, |
|
"loss": 0.5613, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.197437114380637e-06, |
|
"loss": 0.3505, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1879449454200284e-06, |
|
"loss": 0.4028, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.178452776459421e-06, |
|
"loss": 0.3838, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.168960607498814e-06, |
|
"loss": 0.2545, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.159468438538206e-06, |
|
"loss": 0.3897, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.149976269577599e-06, |
|
"loss": 0.1108, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.1404841006169914e-06, |
|
"loss": 0.2364, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.130991931656384e-06, |
|
"loss": 0.4792, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.121499762695777e-06, |
|
"loss": 0.3424, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.112007593735169e-06, |
|
"loss": 0.3274, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.102515424774561e-06, |
|
"loss": 0.2145, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.0930232558139536e-06, |
|
"loss": 0.4916, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.083531086853346e-06, |
|
"loss": 0.4363, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.074038917892739e-06, |
|
"loss": 0.3422, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.064546748932131e-06, |
|
"loss": 0.4339, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.055054579971524e-06, |
|
"loss": 0.5902, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.0455624110109165e-06, |
|
"loss": 0.3237, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.036070242050309e-06, |
|
"loss": 0.4783, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.026578073089702e-06, |
|
"loss": 0.4535, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.0170859041290935e-06, |
|
"loss": 0.5848, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.007593735168486e-06, |
|
"loss": 0.501, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.998101566207879e-06, |
|
"loss": 0.5878, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.988609397247271e-06, |
|
"loss": 0.2525, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.979117228286664e-06, |
|
"loss": 0.1343, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.9696250593260565e-06, |
|
"loss": 0.4367, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.960132890365449e-06, |
|
"loss": 0.4518, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.950640721404842e-06, |
|
"loss": 0.1851, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.941148552444234e-06, |
|
"loss": 0.5702, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.931656383483626e-06, |
|
"loss": 0.5526, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.34791234135627747, |
|
"eval_runtime": 210.9344, |
|
"eval_samples_per_second": 9.581, |
|
"eval_steps_per_second": 2.399, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.922164214523019e-06, |
|
"loss": 0.2374, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.912672045562411e-06, |
|
"loss": 0.2343, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.903179876601804e-06, |
|
"loss": 0.3023, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.893687707641196e-06, |
|
"loss": 0.2294, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.884195538680589e-06, |
|
"loss": 0.2737, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.874703369719982e-06, |
|
"loss": 0.718, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.865211200759374e-06, |
|
"loss": 0.4965, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.855719031798767e-06, |
|
"loss": 0.5557, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.8462268628381585e-06, |
|
"loss": 0.3303, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.836734693877551e-06, |
|
"loss": 0.3357, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.827242524916944e-06, |
|
"loss": 0.1917, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.817750355956336e-06, |
|
"loss": 0.284, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.8082581869957285e-06, |
|
"loss": 0.4455, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.7987660180351215e-06, |
|
"loss": 0.2505, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.789273849074514e-06, |
|
"loss": 0.3804, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7797816801139063e-06, |
|
"loss": 0.3043, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.770289511153299e-06, |
|
"loss": 0.1626, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.7607973421926915e-06, |
|
"loss": 0.2546, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.7513051732320836e-06, |
|
"loss": 0.4317, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.7418130042714762e-06, |
|
"loss": 0.3358, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.732320835310869e-06, |
|
"loss": 0.5867, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.722828666350261e-06, |
|
"loss": 0.6311, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.713336497389654e-06, |
|
"loss": 0.3264, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.7038443284290466e-06, |
|
"loss": 0.4179, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.694352159468439e-06, |
|
"loss": 0.372, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.6848599905078314e-06, |
|
"loss": 0.3102, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.675367821547224e-06, |
|
"loss": 0.494, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.665875652586616e-06, |
|
"loss": 0.2307, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.6563834836260088e-06, |
|
"loss": 0.4356, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.646891314665401e-06, |
|
"loss": 0.5974, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.6373991457047935e-06, |
|
"loss": 0.1553, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.6279069767441866e-06, |
|
"loss": 0.429, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.618414807783579e-06, |
|
"loss": 0.2103, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.6089226388229713e-06, |
|
"loss": 0.505, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.599430469862364e-06, |
|
"loss": 0.3072, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.5899383009017565e-06, |
|
"loss": 0.5361, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5804461319411487e-06, |
|
"loss": 0.6273, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5709539629805413e-06, |
|
"loss": 0.3035, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.5614617940199335e-06, |
|
"loss": 0.3123, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.551969625059326e-06, |
|
"loss": 0.4275, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.542477456098719e-06, |
|
"loss": 0.4825, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.5329852871381117e-06, |
|
"loss": 0.3058, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.523493118177504e-06, |
|
"loss": 0.4175, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.5140009492168964e-06, |
|
"loss": 0.3237, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.5045087802562886e-06, |
|
"loss": 0.5556, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.495016611295681e-06, |
|
"loss": 0.2021, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.485524442335074e-06, |
|
"loss": 0.4329, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.476032273374466e-06, |
|
"loss": 0.465, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.4665401044138586e-06, |
|
"loss": 0.4829, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.4570479354532516e-06, |
|
"loss": 0.3314, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.3463754951953888, |
|
"eval_runtime": 211.4262, |
|
"eval_samples_per_second": 9.559, |
|
"eval_steps_per_second": 2.393, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.447555766492644e-06, |
|
"loss": 0.3652, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.4380635975320364e-06, |
|
"loss": 0.3147, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.428571428571429e-06, |
|
"loss": 0.1345, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.419079259610821e-06, |
|
"loss": 0.361, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.4095870906502137e-06, |
|
"loss": 0.5192, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.4000949216896063e-06, |
|
"loss": 0.37, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.3906027527289985e-06, |
|
"loss": 0.4377, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3811105837683915e-06, |
|
"loss": 0.5498, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.371618414807784e-06, |
|
"loss": 0.2211, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.3621262458471767e-06, |
|
"loss": 0.5511, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.352634076886569e-06, |
|
"loss": 0.4069, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3431419079259615e-06, |
|
"loss": 0.51, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3336497389653537e-06, |
|
"loss": 0.3028, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.3241575700047463e-06, |
|
"loss": 0.2983, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.314665401044139e-06, |
|
"loss": 0.7088, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.305173232083531e-06, |
|
"loss": 0.3242, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.295681063122924e-06, |
|
"loss": 0.3656, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.2861888941623166e-06, |
|
"loss": 0.5446, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.276696725201709e-06, |
|
"loss": 0.1479, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.2672045562411014e-06, |
|
"loss": 0.2724, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.257712387280494e-06, |
|
"loss": 0.4428, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.248220218319886e-06, |
|
"loss": 0.374, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2387280493592788e-06, |
|
"loss": 0.4587, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2292358803986714e-06, |
|
"loss": 0.2607, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.2197437114380635e-06, |
|
"loss": 0.31, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.2102515424774566e-06, |
|
"loss": 0.5392, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.200759373516849e-06, |
|
"loss": 0.3685, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.1912672045562413e-06, |
|
"loss": 0.6325, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.181775035595634e-06, |
|
"loss": 0.4223, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.1722828666350265e-06, |
|
"loss": 0.3727, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.1627906976744187e-06, |
|
"loss": 0.4869, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.1532985287138113e-06, |
|
"loss": 0.2286, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.143806359753204e-06, |
|
"loss": 0.4144, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.134314190792596e-06, |
|
"loss": 0.452, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.124822021831989e-06, |
|
"loss": 0.3295, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.1153298528713817e-06, |
|
"loss": 0.2194, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.105837683910774e-06, |
|
"loss": 0.2943, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.0963455149501664e-06, |
|
"loss": 0.4255, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.086853345989559e-06, |
|
"loss": 0.2137, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.0773611770289512e-06, |
|
"loss": 0.3923, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.067869008068344e-06, |
|
"loss": 0.3551, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.058376839107736e-06, |
|
"loss": 0.3381, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.0488846701471286e-06, |
|
"loss": 0.2882, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.0393925011865216e-06, |
|
"loss": 0.3432, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.029900332225914e-06, |
|
"loss": 0.5384, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.0204081632653064e-06, |
|
"loss": 0.3176, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.010915994304699e-06, |
|
"loss": 0.6058, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.0014238253440916e-06, |
|
"loss": 0.38, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.9919316563834837e-06, |
|
"loss": 0.3803, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.9824394874228763e-06, |
|
"loss": 0.4602, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 0.3481377065181732, |
|
"eval_runtime": 211.3751, |
|
"eval_samples_per_second": 9.561, |
|
"eval_steps_per_second": 2.394, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.9729473184622685e-06, |
|
"loss": 0.4522, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.963455149501661e-06, |
|
"loss": 0.227, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.953962980541054e-06, |
|
"loss": 0.4448, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.9444708115804467e-06, |
|
"loss": 0.538, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.934978642619839e-06, |
|
"loss": 0.3633, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.9254864736592315e-06, |
|
"loss": 0.2754, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.915994304698624e-06, |
|
"loss": 0.5962, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.9065021357380163e-06, |
|
"loss": 0.2812, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.897009966777409e-06, |
|
"loss": 0.5014, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.887517797816801e-06, |
|
"loss": 0.271, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8780256288561936e-06, |
|
"loss": 0.2206, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8685334598955866e-06, |
|
"loss": 0.3344, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8590412909349792e-06, |
|
"loss": 0.4583, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8495491219743714e-06, |
|
"loss": 0.4646, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.840056953013764e-06, |
|
"loss": 0.2786, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.830564784053156e-06, |
|
"loss": 0.2995, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.8210726150925488e-06, |
|
"loss": 0.2673, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.8115804461319414e-06, |
|
"loss": 0.3, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.8020882771713336e-06, |
|
"loss": 0.4938, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.792596108210726e-06, |
|
"loss": 0.5341, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.783103939250119e-06, |
|
"loss": 0.4789, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7736117702895118e-06, |
|
"loss": 0.203, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.764119601328904e-06, |
|
"loss": 0.2085, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7546274323682965e-06, |
|
"loss": 0.35, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.7451352634076887e-06, |
|
"loss": 0.5455, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.7356430944470813e-06, |
|
"loss": 0.6806, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.726150925486474e-06, |
|
"loss": 0.3908, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.716658756525866e-06, |
|
"loss": 0.3496, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.7071665875652587e-06, |
|
"loss": 0.3023, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.6976744186046517e-06, |
|
"loss": 0.2246, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.688182249644044e-06, |
|
"loss": 0.325, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.6786900806834365e-06, |
|
"loss": 0.344, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.669197911722829e-06, |
|
"loss": 0.496, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.6597057427622212e-06, |
|
"loss": 0.1879, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.650213573801614e-06, |
|
"loss": 0.3855, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.6407214048410064e-06, |
|
"loss": 0.5485, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.6312292358803986e-06, |
|
"loss": 0.2999, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.621737066919791e-06, |
|
"loss": 0.2614, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.6122448979591842e-06, |
|
"loss": 0.615, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.6027527289985764e-06, |
|
"loss": 0.4784, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.593260560037969e-06, |
|
"loss": 0.3825, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.5837683910773616e-06, |
|
"loss": 0.2182, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.5742762221167538e-06, |
|
"loss": 0.336, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.5647840531561463e-06, |
|
"loss": 0.3651, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.555291884195539e-06, |
|
"loss": 0.2512, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.545799715234931e-06, |
|
"loss": 0.4523, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.5363075462743237e-06, |
|
"loss": 0.3506, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.5268153773137167e-06, |
|
"loss": 0.3505, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.517323208353109e-06, |
|
"loss": 0.533, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.5078310393925015e-06, |
|
"loss": 0.4401, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.34744471311569214, |
|
"eval_runtime": 211.4984, |
|
"eval_samples_per_second": 9.556, |
|
"eval_steps_per_second": 2.392, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.498338870431894e-06, |
|
"loss": 0.3372, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.4888467014712863e-06, |
|
"loss": 0.3914, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.479354532510679e-06, |
|
"loss": 0.4832, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4698623635500715e-06, |
|
"loss": 0.3643, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.460370194589464e-06, |
|
"loss": 0.3191, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.4508780256288562e-06, |
|
"loss": 0.4392, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.441385856668249e-06, |
|
"loss": 0.2072, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.4318936877076414e-06, |
|
"loss": 0.193, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.422401518747034e-06, |
|
"loss": 0.3453, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.4129093497864266e-06, |
|
"loss": 0.3169, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.403417180825819e-06, |
|
"loss": 0.3616, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3939250118652114e-06, |
|
"loss": 0.2884, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.384432842904604e-06, |
|
"loss": 0.5743, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3749406739439966e-06, |
|
"loss": 0.3009, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3654485049833888e-06, |
|
"loss": 0.4973, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.3559563360227814e-06, |
|
"loss": 0.3357, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.346464167062174e-06, |
|
"loss": 0.3922, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3369719981015665e-06, |
|
"loss": 0.3381, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.327479829140959e-06, |
|
"loss": 0.2991, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3179876601803513e-06, |
|
"loss": 0.4372, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.308495491219744e-06, |
|
"loss": 0.2993, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2990033222591365e-06, |
|
"loss": 0.3867, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.289511153298529e-06, |
|
"loss": 0.4566, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2800189843379213e-06, |
|
"loss": 0.2101, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.270526815377314e-06, |
|
"loss": 0.2875, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2610346464167065e-06, |
|
"loss": 0.4097, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.251542477456099e-06, |
|
"loss": 0.4438, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2420503084954912e-06, |
|
"loss": 0.4007, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.232558139534884e-06, |
|
"loss": 0.3578, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2230659705742764e-06, |
|
"loss": 0.4336, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.213573801613669e-06, |
|
"loss": 0.6333, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.2040816326530616e-06, |
|
"loss": 0.1238, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.194589463692454e-06, |
|
"loss": 0.5357, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.1850972947318464e-06, |
|
"loss": 0.2623, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.1756051257712386e-06, |
|
"loss": 0.2794, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1661129568106316e-06, |
|
"loss": 0.489, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1566207878500238e-06, |
|
"loss": 0.3155, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1471286188894164e-06, |
|
"loss": 0.2944, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.137636449928809e-06, |
|
"loss": 0.4045, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.1281442809682016e-06, |
|
"loss": 0.3847, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.118652112007594e-06, |
|
"loss": 0.2581, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.1091599430469863e-06, |
|
"loss": 0.1977, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.099667774086379e-06, |
|
"loss": 0.1456, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.090175605125771e-06, |
|
"loss": 0.3576, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.080683436165164e-06, |
|
"loss": 0.1775, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0711912672045563e-06, |
|
"loss": 0.153, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.061699098243949e-06, |
|
"loss": 0.4822, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.0522069292833415e-06, |
|
"loss": 0.3706, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.042714760322734e-06, |
|
"loss": 0.5487, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0332225913621267e-06, |
|
"loss": 0.1947, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.3472154438495636, |
|
"eval_runtime": 210.133, |
|
"eval_samples_per_second": 9.618, |
|
"eval_steps_per_second": 2.408, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.023730422401519e-06, |
|
"loss": 0.3501, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0142382534409114e-06, |
|
"loss": 0.3572, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.0047460844803036e-06, |
|
"loss": 0.4292, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.9952539155196966e-06, |
|
"loss": 0.202, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.985761746559089e-06, |
|
"loss": 0.5637, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.9762695775984814e-06, |
|
"loss": 0.5057, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.966777408637874e-06, |
|
"loss": 0.2232, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9572852396772666e-06, |
|
"loss": 0.3422, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9477930707166588e-06, |
|
"loss": 0.282, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9383009017560514e-06, |
|
"loss": 0.6693, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.928808732795444e-06, |
|
"loss": 0.3485, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.9193165638348366e-06, |
|
"loss": 0.5767, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.909824394874229e-06, |
|
"loss": 0.3807, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.9003322259136213e-06, |
|
"loss": 0.2077, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.890840056953014e-06, |
|
"loss": 0.2414, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8813478879924063e-06, |
|
"loss": 0.3187, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.8718557190317991e-06, |
|
"loss": 0.5724, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8623635500711915e-06, |
|
"loss": 0.55, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8528713811105839e-06, |
|
"loss": 0.386, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.8433792121499763e-06, |
|
"loss": 0.1704, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.833887043189369e-06, |
|
"loss": 0.5093, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8243948742287615e-06, |
|
"loss": 0.2857, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8149027052681538e-06, |
|
"loss": 0.4402, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.8054105363075464e-06, |
|
"loss": 0.3786, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.7959183673469388e-06, |
|
"loss": 0.443, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.7864261983863314e-06, |
|
"loss": 0.1684, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.776934029425724e-06, |
|
"loss": 0.4989, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7674418604651164e-06, |
|
"loss": 0.3821, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7579496915045088e-06, |
|
"loss": 0.3299, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.7484575225439016e-06, |
|
"loss": 0.4878, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.738965353583294e-06, |
|
"loss": 0.3214, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7294731846226864e-06, |
|
"loss": 0.3999, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.719981015662079e-06, |
|
"loss": 0.2993, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.7104888467014713e-06, |
|
"loss": 0.415, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.700996677740864e-06, |
|
"loss": 0.29, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6915045087802565e-06, |
|
"loss": 0.3722, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.682012339819649e-06, |
|
"loss": 0.3986, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6725201708590413e-06, |
|
"loss": 0.2658, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6630280018984341e-06, |
|
"loss": 0.4891, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6535358329378265e-06, |
|
"loss": 0.3952, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6440436639772189e-06, |
|
"loss": 0.1337, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.6345514950166113e-06, |
|
"loss": 0.358, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.6250593260560039e-06, |
|
"loss": 0.4691, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.6155671570953965e-06, |
|
"loss": 0.3327, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.606074988134789e-06, |
|
"loss": 0.5424, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5965828191741814e-06, |
|
"loss": 0.4439, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5870906502135738e-06, |
|
"loss": 0.7304, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.5775984812529666e-06, |
|
"loss": 0.2145, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.568106312292359e-06, |
|
"loss": 0.33, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5586141433317514e-06, |
|
"loss": 0.4144, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.3469783067703247, |
|
"eval_runtime": 210.9536, |
|
"eval_samples_per_second": 9.58, |
|
"eval_steps_per_second": 2.399, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5491219743711438e-06, |
|
"loss": 0.4365, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.5396298054105364e-06, |
|
"loss": 0.306, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.530137636449929e-06, |
|
"loss": 0.5146, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.5206454674893214e-06, |
|
"loss": 0.2689, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.511153298528714e-06, |
|
"loss": 0.2034, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.5016611295681064e-06, |
|
"loss": 0.4136, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.492168960607499e-06, |
|
"loss": 0.2729, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.4826767916468915e-06, |
|
"loss": 0.6817, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.473184622686284e-06, |
|
"loss": 0.3407, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4636924537256763e-06, |
|
"loss": 0.4404, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4542002847650687e-06, |
|
"loss": 0.4365, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.4447081158044615e-06, |
|
"loss": 0.318, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.435215946843854e-06, |
|
"loss": 0.7077, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4257237778832465e-06, |
|
"loss": 0.4964, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4162316089226389e-06, |
|
"loss": 0.5746, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.4067394399620315e-06, |
|
"loss": 0.349, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.397247271001424e-06, |
|
"loss": 0.4112, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3877551020408165e-06, |
|
"loss": 0.3403, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3782629330802088e-06, |
|
"loss": 0.4245, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.3687707641196012e-06, |
|
"loss": 0.473, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.359278595158994e-06, |
|
"loss": 0.8119, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.3497864261983864e-06, |
|
"loss": 0.435, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.3402942572377788e-06, |
|
"loss": 0.2139, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.3308020882771714e-06, |
|
"loss": 0.4287, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.321309919316564e-06, |
|
"loss": 0.2869, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.3118177503559566e-06, |
|
"loss": 0.1564, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.302325581395349e-06, |
|
"loss": 0.4634, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.2928334124347414e-06, |
|
"loss": 0.1519, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.2833412434741342e-06, |
|
"loss": 0.3904, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.2738490745135266e-06, |
|
"loss": 0.227, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.264356905552919e-06, |
|
"loss": 0.5029, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.2548647365923113e-06, |
|
"loss": 0.4979, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.245372567631704e-06, |
|
"loss": 0.4358, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.2358803986710965e-06, |
|
"loss": 0.304, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.226388229710489e-06, |
|
"loss": 0.6384, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.2168960607498815e-06, |
|
"loss": 0.1619, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.207403891789274e-06, |
|
"loss": 0.2769, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.1979117228286665e-06, |
|
"loss": 0.3026, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.188419553868059e-06, |
|
"loss": 0.3886, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1789273849074515e-06, |
|
"loss": 0.1441, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.1694352159468438e-06, |
|
"loss": 0.3779, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1599430469862364e-06, |
|
"loss": 0.3901, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1504508780256288e-06, |
|
"loss": 0.2091, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.1409587090650214e-06, |
|
"loss": 0.3605, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.131466540104414e-06, |
|
"loss": 0.416, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1219743711438064e-06, |
|
"loss": 0.2775, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.112482202183199e-06, |
|
"loss": 0.4789, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.1029900332225916e-06, |
|
"loss": 0.4778, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.093497864261984e-06, |
|
"loss": 0.3076, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0840056953013764e-06, |
|
"loss": 0.3726, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.34603193402290344, |
|
"eval_runtime": 209.9907, |
|
"eval_samples_per_second": 9.624, |
|
"eval_steps_per_second": 2.41, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.074513526340769e-06, |
|
"loss": 0.4906, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.0650213573801613e-06, |
|
"loss": 0.3415, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.055529188419554e-06, |
|
"loss": 0.2651, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.0460370194589463e-06, |
|
"loss": 0.3119, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.036544850498339e-06, |
|
"loss": 0.3054, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0270526815377315e-06, |
|
"loss": 0.3226, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0175605125771241e-06, |
|
"loss": 0.5005, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.0080683436165165e-06, |
|
"loss": 0.2753, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.985761746559089e-07, |
|
"loss": 0.2687, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.890840056953015e-07, |
|
"loss": 0.3084, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.795918367346939e-07, |
|
"loss": 0.3602, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.700996677740865e-07, |
|
"loss": 0.3026, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.606074988134788e-07, |
|
"loss": 0.5057, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.511153298528716e-07, |
|
"loss": 0.3735, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.416231608922639e-07, |
|
"loss": 0.5268, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.321309919316565e-07, |
|
"loss": 0.4173, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.226388229710489e-07, |
|
"loss": 0.3529, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.131466540104414e-07, |
|
"loss": 0.3961, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.03654485049834e-07, |
|
"loss": 0.4331, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 8.941623160892264e-07, |
|
"loss": 0.3743, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.84670147128619e-07, |
|
"loss": 0.2121, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.751779781680114e-07, |
|
"loss": 0.4233, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.65685809207404e-07, |
|
"loss": 0.3716, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.561936402467965e-07, |
|
"loss": 0.275, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.467014712861891e-07, |
|
"loss": 0.2921, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.372093023255814e-07, |
|
"loss": 0.3295, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.27717133364974e-07, |
|
"loss": 0.4326, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.182249644043664e-07, |
|
"loss": 0.4383, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.087327954437589e-07, |
|
"loss": 0.3254, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.992406264831515e-07, |
|
"loss": 0.3985, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.897484575225439e-07, |
|
"loss": 0.4201, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.802562885619365e-07, |
|
"loss": 0.15, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.70764119601329e-07, |
|
"loss": 0.5237, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.612719506407215e-07, |
|
"loss": 0.3785, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.51779781680114e-07, |
|
"loss": 0.508, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.422876127195066e-07, |
|
"loss": 0.3134, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.327954437588989e-07, |
|
"loss": 0.4777, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.233032747982914e-07, |
|
"loss": 0.2723, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.13811105837684e-07, |
|
"loss": 0.2647, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.043189368770764e-07, |
|
"loss": 0.4874, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.94826767916469e-07, |
|
"loss": 0.0944, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.853345989558614e-07, |
|
"loss": 0.2228, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.75842429995254e-07, |
|
"loss": 0.3151, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.663502610346465e-07, |
|
"loss": 0.3432, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.568580920740391e-07, |
|
"loss": 0.4183, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.473659231134315e-07, |
|
"loss": 0.2398, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.37873754152824e-07, |
|
"loss": 0.2553, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.283815851922164e-07, |
|
"loss": 0.2551, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.18889416231609e-07, |
|
"loss": 0.4634, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.093972472710015e-07, |
|
"loss": 0.418, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 0.34635499119758606, |
|
"eval_runtime": 210.2528, |
|
"eval_samples_per_second": 9.612, |
|
"eval_steps_per_second": 2.407, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.99905078310394e-07, |
|
"loss": 0.3378, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.904129093497864e-07, |
|
"loss": 0.3378, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.809207403891789e-07, |
|
"loss": 0.1693, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.3012, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.61936402467964e-07, |
|
"loss": 0.2447, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.524442335073565e-07, |
|
"loss": 0.2869, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.42952064546749e-07, |
|
"loss": 0.3656, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.334598955861415e-07, |
|
"loss": 0.1434, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.239677266255339e-07, |
|
"loss": 0.1777, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.144755576649265e-07, |
|
"loss": 0.5208, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.04983388704319e-07, |
|
"loss": 0.5794, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.954912197437114e-07, |
|
"loss": 0.2979, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.859990507831039e-07, |
|
"loss": 0.2351, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.7650688182249645e-07, |
|
"loss": 0.2441, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.67014712861889e-07, |
|
"loss": 0.3115, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.575225439012815e-07, |
|
"loss": 0.2808, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.48030374940674e-07, |
|
"loss": 0.2838, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.385382059800665e-07, |
|
"loss": 0.2347, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.29046037019459e-07, |
|
"loss": 0.4912, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.195538680588515e-07, |
|
"loss": 0.2631, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.1006169909824394e-07, |
|
"loss": 0.2965, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.0056953013763643e-07, |
|
"loss": 0.3917, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.91077361177029e-07, |
|
"loss": 0.5057, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.8158519221642146e-07, |
|
"loss": 0.4255, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.7209302325581396e-07, |
|
"loss": 0.2203, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.626008542952065e-07, |
|
"loss": 0.4773, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.53108685334599e-07, |
|
"loss": 0.2554, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.436165163739915e-07, |
|
"loss": 0.389, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.34124347413384e-07, |
|
"loss": 0.2402, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.246321784527765e-07, |
|
"loss": 0.447, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.1514000949216895e-07, |
|
"loss": 0.3556, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.056478405315615e-07, |
|
"loss": 0.3357, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.96155671570954e-07, |
|
"loss": 0.5372, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.866635026103465e-07, |
|
"loss": 0.3488, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.7717133364973897e-07, |
|
"loss": 0.4506, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.676791646891315e-07, |
|
"loss": 0.3523, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.58186995728524e-07, |
|
"loss": 0.3725, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.486948267679165e-07, |
|
"loss": 0.5271, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.39202657807309e-07, |
|
"loss": 0.2397, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.297104888467015e-07, |
|
"loss": 0.1559, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.20218319886094e-07, |
|
"loss": 0.4188, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.107261509254865e-07, |
|
"loss": 0.5267, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.0123398196487897e-07, |
|
"loss": 0.5628, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.917418130042715e-07, |
|
"loss": 0.4105, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.82249644043664e-07, |
|
"loss": 0.2813, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.727574750830565e-07, |
|
"loss": 0.3071, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.6326530612244901e-07, |
|
"loss": 0.3369, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.537731371618415e-07, |
|
"loss": 0.1922, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.44280968201234e-07, |
|
"loss": 0.461, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.3478879924062649e-07, |
|
"loss": 0.3687, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.34634825587272644, |
|
"eval_runtime": 209.8305, |
|
"eval_samples_per_second": 9.632, |
|
"eval_steps_per_second": 2.411, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.115502904658845e-06, |
|
"loss": 0.4669, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.109807495158902e-06, |
|
"loss": 0.3283, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.104112085658959e-06, |
|
"loss": 0.2859, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.098416676159016e-06, |
|
"loss": 0.2623, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.0927212666590734e-06, |
|
"loss": 0.3267, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.08702585715913e-06, |
|
"loss": 0.278, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.081330447659187e-06, |
|
"loss": 0.487, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.075635038159244e-06, |
|
"loss": 0.3299, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.069939628659301e-06, |
|
"loss": 0.4466, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.064244219159358e-06, |
|
"loss": 0.2438, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.058548809659415e-06, |
|
"loss": 0.6179, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.052853400159472e-06, |
|
"loss": 0.3405, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.047157990659529e-06, |
|
"loss": 0.344, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.041462581159586e-06, |
|
"loss": 0.3586, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.035767171659642e-06, |
|
"loss": 0.1847, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.0300717621597e-06, |
|
"loss": 0.3832, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.024376352659757e-06, |
|
"loss": 0.2131, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.018680943159813e-06, |
|
"loss": 0.3245, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.01298553365987e-06, |
|
"loss": 0.5644, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.007290124159927e-06, |
|
"loss": 0.1613, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.001594714659984e-06, |
|
"loss": 0.3756, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.9958993051600414e-06, |
|
"loss": 0.1069, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.990203895660098e-06, |
|
"loss": 0.4927, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.9845084861601555e-06, |
|
"loss": 0.386, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.9788130766602126e-06, |
|
"loss": 0.3843, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.973117667160269e-06, |
|
"loss": 0.2719, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.967422257660326e-06, |
|
"loss": 0.4034, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.961726848160383e-06, |
|
"loss": 0.3937, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.95603143866044e-06, |
|
"loss": 0.4995, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.950336029160497e-06, |
|
"loss": 0.2434, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.944640619660554e-06, |
|
"loss": 0.3839, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.938945210160611e-06, |
|
"loss": 0.3355, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.933249800660668e-06, |
|
"loss": 0.2876, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.927554391160725e-06, |
|
"loss": 0.4364, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.921858981660781e-06, |
|
"loss": 0.2628, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.916163572160839e-06, |
|
"loss": 0.3119, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.910468162660896e-06, |
|
"loss": 0.1733, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.904772753160952e-06, |
|
"loss": 0.2827, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.899077343661009e-06, |
|
"loss": 0.1766, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.8933819341610665e-06, |
|
"loss": 0.2672, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.8876865246611235e-06, |
|
"loss": 0.3424, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.8819911151611805e-06, |
|
"loss": 0.4487, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.876295705661237e-06, |
|
"loss": 0.4172, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.870600296161295e-06, |
|
"loss": 0.3854, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.864904886661352e-06, |
|
"loss": 0.3686, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.859209477161408e-06, |
|
"loss": 0.2533, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.853514067661465e-06, |
|
"loss": 0.1369, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.847818658161522e-06, |
|
"loss": 0.4269, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.842123248661579e-06, |
|
"loss": 0.2595, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.836427839161636e-06, |
|
"loss": 0.2355, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 0.34977805614471436, |
|
"eval_runtime": 212.3154, |
|
"eval_samples_per_second": 9.519, |
|
"eval_steps_per_second": 2.383, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.830732429661693e-06, |
|
"loss": 0.3466, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.82503702016175e-06, |
|
"loss": 0.1506, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.819341610661807e-06, |
|
"loss": 0.2931, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.813646201161864e-06, |
|
"loss": 0.5066, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.807950791661921e-06, |
|
"loss": 0.3229, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.802255382161978e-06, |
|
"loss": 0.2496, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.7965599726620344e-06, |
|
"loss": 0.4409, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.7908645631620915e-06, |
|
"loss": 0.3241, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.785169153662149e-06, |
|
"loss": 0.2519, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.7794737441622056e-06, |
|
"loss": 0.4325, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.7737783346622626e-06, |
|
"loss": 0.3483, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.7680829251623196e-06, |
|
"loss": 0.3515, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.7623875156623763e-06, |
|
"loss": 0.4152, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.7566921061624333e-06, |
|
"loss": 0.3937, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.7509966966624903e-06, |
|
"loss": 0.4958, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.745301287162547e-06, |
|
"loss": 0.3978, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.7396058776626044e-06, |
|
"loss": 0.3547, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.7339104681626615e-06, |
|
"loss": 0.1469, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.728215058662718e-06, |
|
"loss": 0.4382, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.722519649162775e-06, |
|
"loss": 0.234, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.7168242396628317e-06, |
|
"loss": 0.3652, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.7111288301628888e-06, |
|
"loss": 0.2035, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.7054334206629462e-06, |
|
"loss": 0.1942, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.699738011163003e-06, |
|
"loss": 0.6065, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.69404260166306e-06, |
|
"loss": 0.3618, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.688347192163117e-06, |
|
"loss": 0.3662, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.6826517826631736e-06, |
|
"loss": 0.2511, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.6769563731632306e-06, |
|
"loss": 0.3071, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.671260963663288e-06, |
|
"loss": 0.4005, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.6655655541633447e-06, |
|
"loss": 0.3301, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.6598701446634017e-06, |
|
"loss": 0.3304, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.6541747351634588e-06, |
|
"loss": 0.4917, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.6484793256635154e-06, |
|
"loss": 0.4638, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.6427839161635724e-06, |
|
"loss": 0.3115, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.63708850666363e-06, |
|
"loss": 0.322, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.631393097163686e-06, |
|
"loss": 0.365, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.6256976876637435e-06, |
|
"loss": 0.361, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.6200022781638e-06, |
|
"loss": 0.3455, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.614306868663857e-06, |
|
"loss": 0.3239, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.6086114591639142e-06, |
|
"loss": 0.229, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.602916049663971e-06, |
|
"loss": 0.3363, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.597220640164028e-06, |
|
"loss": 0.3821, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.5915252306640854e-06, |
|
"loss": 0.5456, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.585829821164142e-06, |
|
"loss": 0.3119, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.580134411664199e-06, |
|
"loss": 0.1574, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.574439002164256e-06, |
|
"loss": 0.4289, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.5687435926643127e-06, |
|
"loss": 0.5156, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.5630481831643697e-06, |
|
"loss": 0.2907, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.557352773664427e-06, |
|
"loss": 0.4394, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.5516573641644838e-06, |
|
"loss": 0.4248, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"eval_loss": 0.3501090109348297, |
|
"eval_runtime": 212.2293, |
|
"eval_samples_per_second": 9.523, |
|
"eval_steps_per_second": 2.384, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 3.545961954664541e-06, |
|
"loss": 0.3417, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.540266545164598e-06, |
|
"loss": 0.4421, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.5345711356646545e-06, |
|
"loss": 0.0987, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.5288757261647115e-06, |
|
"loss": 0.2601, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.523180316664768e-06, |
|
"loss": 0.5805, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.517484907164825e-06, |
|
"loss": 0.3845, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.5117894976648826e-06, |
|
"loss": 0.5719, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.5060940881649393e-06, |
|
"loss": 0.2547, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.5003986786649963e-06, |
|
"loss": 0.4258, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.4947032691650533e-06, |
|
"loss": 0.3509, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.48900785966511e-06, |
|
"loss": 0.4785, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.483312450165167e-06, |
|
"loss": 0.3351, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.4776170406652245e-06, |
|
"loss": 0.319, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.471921631165281e-06, |
|
"loss": 0.5243, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.466226221665338e-06, |
|
"loss": 0.3207, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.460530812165395e-06, |
|
"loss": 0.3911, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.4548354026654518e-06, |
|
"loss": 0.4672, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.449139993165509e-06, |
|
"loss": 0.3416, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.4434445836655663e-06, |
|
"loss": 0.1996, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.437749174165623e-06, |
|
"loss": 0.2258, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.43205376466568e-06, |
|
"loss": 0.6087, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.4263583551657366e-06, |
|
"loss": 0.5061, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.4206629456657936e-06, |
|
"loss": 0.318, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.4149675361658506e-06, |
|
"loss": 0.2787, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.4092721266659073e-06, |
|
"loss": 0.2786, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.4035767171659643e-06, |
|
"loss": 0.3459, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.3978813076660218e-06, |
|
"loss": 0.3916, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.3921858981660784e-06, |
|
"loss": 0.485, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.3864904886661354e-06, |
|
"loss": 0.1479, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3807950791661925e-06, |
|
"loss": 0.2049, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.375099669666249e-06, |
|
"loss": 0.2876, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.369404260166306e-06, |
|
"loss": 0.4562, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.3637088506663636e-06, |
|
"loss": 0.3319, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.35801344116642e-06, |
|
"loss": 0.442, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.3523180316664772e-06, |
|
"loss": 0.3675, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.346622622166534e-06, |
|
"loss": 0.3927, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.340927212666591e-06, |
|
"loss": 0.6269, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.335231803166648e-06, |
|
"loss": 0.1756, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.3295363936667045e-06, |
|
"loss": 0.4247, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.323840984166762e-06, |
|
"loss": 0.3975, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.318145574666819e-06, |
|
"loss": 0.4755, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.3124501651668757e-06, |
|
"loss": 0.379, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.3067547556669327e-06, |
|
"loss": 0.3689, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.3010593461669897e-06, |
|
"loss": 0.2979, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.2953639366670464e-06, |
|
"loss": 0.455, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.2896685271671034e-06, |
|
"loss": 0.4162, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.283973117667161e-06, |
|
"loss": 0.4755, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.2782777081672175e-06, |
|
"loss": 0.3042, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.2725822986672745e-06, |
|
"loss": 0.286, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.2668868891673316e-06, |
|
"loss": 0.4205, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.3515862822532654, |
|
"eval_runtime": 210.7037, |
|
"eval_samples_per_second": 9.592, |
|
"eval_steps_per_second": 2.401, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.261191479667388e-06, |
|
"loss": 0.4638, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.2554960701674452e-06, |
|
"loss": 0.4955, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.249800660667502e-06, |
|
"loss": 0.3171, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.2441052511675593e-06, |
|
"loss": 0.3157, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.2384098416676163e-06, |
|
"loss": 0.4029, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.232714432167673e-06, |
|
"loss": 0.6141, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.22701902266773e-06, |
|
"loss": 0.5261, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.221323613167787e-06, |
|
"loss": 0.4287, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.2156282036678437e-06, |
|
"loss": 0.3101, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.209932794167901e-06, |
|
"loss": 0.4954, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.204237384667958e-06, |
|
"loss": 0.4279, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.1985419751680148e-06, |
|
"loss": 0.1931, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.192846565668072e-06, |
|
"loss": 0.4763, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.187151156168129e-06, |
|
"loss": 0.1875, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.1814557466681855e-06, |
|
"loss": 0.1963, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.175760337168243e-06, |
|
"loss": 0.3605, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.1700649276683e-06, |
|
"loss": 0.2117, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.1643695181683566e-06, |
|
"loss": 0.4029, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.1586741086684136e-06, |
|
"loss": 0.3201, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.1529786991684702e-06, |
|
"loss": 0.4943, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.1472832896685273e-06, |
|
"loss": 0.459, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.1415878801685843e-06, |
|
"loss": 0.1145, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.135892470668641e-06, |
|
"loss": 0.3262, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.1301970611686984e-06, |
|
"loss": 0.3552, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.1245016516687554e-06, |
|
"loss": 0.3953, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.118806242168812e-06, |
|
"loss": 0.2188, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.113110832668869e-06, |
|
"loss": 0.2279, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.107415423168926e-06, |
|
"loss": 0.398, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.1017200136689828e-06, |
|
"loss": 0.4347, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.0960246041690402e-06, |
|
"loss": 0.232, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.0903291946690973e-06, |
|
"loss": 0.2781, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.084633785169154e-06, |
|
"loss": 0.3938, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.078938375669211e-06, |
|
"loss": 0.5736, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.073242966169268e-06, |
|
"loss": 0.327, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.0675475566693246e-06, |
|
"loss": 0.2571, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.061852147169382e-06, |
|
"loss": 0.3868, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.0561567376694382e-06, |
|
"loss": 0.3174, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.0504613281694957e-06, |
|
"loss": 0.2424, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.0447659186695527e-06, |
|
"loss": 0.2545, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.0390705091696094e-06, |
|
"loss": 0.4465, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.0333750996696664e-06, |
|
"loss": 0.304, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.0276796901697234e-06, |
|
"loss": 0.3063, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.02198428066978e-06, |
|
"loss": 0.3685, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.0162888711698375e-06, |
|
"loss": 0.4217, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.0105934616698946e-06, |
|
"loss": 0.3801, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.004898052169951e-06, |
|
"loss": 0.2402, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.9992026426700082e-06, |
|
"loss": 0.3375, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.9935072331700653e-06, |
|
"loss": 0.4198, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.987811823670122e-06, |
|
"loss": 0.4481, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 2.9821164141701793e-06, |
|
"loss": 0.1827, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_loss": 0.3478640913963318, |
|
"eval_runtime": 211.175, |
|
"eval_samples_per_second": 9.57, |
|
"eval_steps_per_second": 2.396, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.976421004670236e-06, |
|
"loss": 0.2672, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.970725595170293e-06, |
|
"loss": 0.3767, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.96503018567035e-06, |
|
"loss": 0.356, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.9593347761704066e-06, |
|
"loss": 0.2137, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.9536393666704637e-06, |
|
"loss": 0.156, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.947943957170521e-06, |
|
"loss": 0.2804, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.9422485476705773e-06, |
|
"loss": 0.346, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.936553138170635e-06, |
|
"loss": 0.3009, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.930857728670692e-06, |
|
"loss": 0.3055, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 2.9251623191707485e-06, |
|
"loss": 0.4737, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.9194669096708055e-06, |
|
"loss": 0.214, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.9137715001708625e-06, |
|
"loss": 0.3206, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.908076090670919e-06, |
|
"loss": 0.5329, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.9023806811709766e-06, |
|
"loss": 0.1631, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.8966852716710337e-06, |
|
"loss": 0.3896, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.8909898621710903e-06, |
|
"loss": 0.7929, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.8852944526711473e-06, |
|
"loss": 0.3773, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.879599043171204e-06, |
|
"loss": 0.2049, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.873903633671261e-06, |
|
"loss": 0.3991, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.8682082241713184e-06, |
|
"loss": 0.3699, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.862512814671375e-06, |
|
"loss": 0.5049, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.856817405171432e-06, |
|
"loss": 0.727, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.851121995671489e-06, |
|
"loss": 0.4813, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.8454265861715458e-06, |
|
"loss": 0.2857, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.839731176671603e-06, |
|
"loss": 0.3309, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.8340357671716603e-06, |
|
"loss": 0.5852, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.8283403576717165e-06, |
|
"loss": 0.5285, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.822644948171774e-06, |
|
"loss": 0.3123, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.816949538671831e-06, |
|
"loss": 0.3655, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.8112541291718876e-06, |
|
"loss": 0.227, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.8055587196719446e-06, |
|
"loss": 0.188, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.7998633101720017e-06, |
|
"loss": 0.4075, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.7941679006720583e-06, |
|
"loss": 0.3146, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.7884724911721157e-06, |
|
"loss": 0.2311, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.7827770816721724e-06, |
|
"loss": 0.3325, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.7770816721722294e-06, |
|
"loss": 0.179, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.7713862626722864e-06, |
|
"loss": 0.3413, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.765690853172343e-06, |
|
"loss": 0.4583, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.7599954436724e-06, |
|
"loss": 0.4093, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.7543000341724576e-06, |
|
"loss": 0.321, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.748604624672514e-06, |
|
"loss": 0.2316, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.742909215172571e-06, |
|
"loss": 0.2259, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.7372138056726283e-06, |
|
"loss": 0.4776, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.731518396172685e-06, |
|
"loss": 0.1712, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.725822986672742e-06, |
|
"loss": 0.3603, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 2.7201275771727994e-06, |
|
"loss": 0.4242, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.714432167672856e-06, |
|
"loss": 0.3223, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.708736758172913e-06, |
|
"loss": 0.3738, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.70304134867297e-06, |
|
"loss": 0.1973, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.6973459391730267e-06, |
|
"loss": 0.3688, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 0.34860894083976746, |
|
"eval_runtime": 211.4299, |
|
"eval_samples_per_second": 9.559, |
|
"eval_steps_per_second": 2.393, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.6916505296730837e-06, |
|
"loss": 0.5827, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.6859551201731403e-06, |
|
"loss": 0.4093, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.6802597106731974e-06, |
|
"loss": 0.2026, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.674564301173255e-06, |
|
"loss": 0.3811, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.6688688916733115e-06, |
|
"loss": 0.4959, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.6631734821733685e-06, |
|
"loss": 0.2947, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 2.6574780726734255e-06, |
|
"loss": 0.3327, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.651782663173482e-06, |
|
"loss": 0.4354, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.646087253673539e-06, |
|
"loss": 0.4678, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 2.6403918441735967e-06, |
|
"loss": 0.2031, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.6346964346736533e-06, |
|
"loss": 0.4424, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.6290010251737103e-06, |
|
"loss": 0.3075, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.6233056156737674e-06, |
|
"loss": 0.2436, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.617610206173824e-06, |
|
"loss": 0.2671, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.611914796673881e-06, |
|
"loss": 0.2847, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.6062193871739376e-06, |
|
"loss": 0.519, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 2.600523977673995e-06, |
|
"loss": 0.3778, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.594828568174052e-06, |
|
"loss": 0.3399, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.5891331586741088e-06, |
|
"loss": 0.296, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.583437749174166e-06, |
|
"loss": 0.4316, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 2.577742339674223e-06, |
|
"loss": 0.3023, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.5720469301742795e-06, |
|
"loss": 0.2552, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.5663515206743365e-06, |
|
"loss": 0.3943, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.560656111174394e-06, |
|
"loss": 0.4276, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.5549607016744506e-06, |
|
"loss": 0.4096, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.5492652921745076e-06, |
|
"loss": 0.3047, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.5435698826745647e-06, |
|
"loss": 0.4829, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.5378744731746213e-06, |
|
"loss": 0.3925, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.5321790636746783e-06, |
|
"loss": 0.2451, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.5264836541747358e-06, |
|
"loss": 0.3625, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.5207882446747924e-06, |
|
"loss": 0.26, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.5150928351748494e-06, |
|
"loss": 0.4886, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.509397425674906e-06, |
|
"loss": 0.3842, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.503702016174963e-06, |
|
"loss": 0.5277, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.49800660667502e-06, |
|
"loss": 0.537, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.492311197175077e-06, |
|
"loss": 0.1046, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.486615787675134e-06, |
|
"loss": 0.3134, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.480920378175191e-06, |
|
"loss": 0.4783, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.475224968675248e-06, |
|
"loss": 0.5872, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.469529559175305e-06, |
|
"loss": 0.3972, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.463834149675362e-06, |
|
"loss": 0.2255, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.458138740175419e-06, |
|
"loss": 0.3646, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.4524433306754756e-06, |
|
"loss": 0.1656, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.4467479211755326e-06, |
|
"loss": 0.5685, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.4410525116755897e-06, |
|
"loss": 0.3274, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 2.4353571021756467e-06, |
|
"loss": 0.3213, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.4296616926757038e-06, |
|
"loss": 0.2743, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.4239662831757604e-06, |
|
"loss": 0.3187, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.4182708736758174e-06, |
|
"loss": 0.2557, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.4125754641758745e-06, |
|
"loss": 0.1861, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_loss": 0.3507066071033478, |
|
"eval_runtime": 212.1488, |
|
"eval_samples_per_second": 9.526, |
|
"eval_steps_per_second": 2.385, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.4068800546759315e-06, |
|
"loss": 0.3103, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.4011846451759885e-06, |
|
"loss": 0.4539, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.395489235676045e-06, |
|
"loss": 0.5474, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.389793826176102e-06, |
|
"loss": 0.3454, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.3840984166761592e-06, |
|
"loss": 0.478, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.3784030071762163e-06, |
|
"loss": 0.3736, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.3727075976762733e-06, |
|
"loss": 0.3233, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.36701218817633e-06, |
|
"loss": 0.5434, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.361316778676387e-06, |
|
"loss": 0.2229, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 2.355621369176444e-06, |
|
"loss": 0.2059, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.349925959676501e-06, |
|
"loss": 0.254, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.3442305501765577e-06, |
|
"loss": 0.3806, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.3385351406766147e-06, |
|
"loss": 0.3837, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.3328397311766718e-06, |
|
"loss": 0.439, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.327144321676729e-06, |
|
"loss": 0.3859, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.321448912176786e-06, |
|
"loss": 0.5599, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.315753502676843e-06, |
|
"loss": 0.3513, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 2.3100580931768995e-06, |
|
"loss": 0.3253, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 2.3043626836769565e-06, |
|
"loss": 0.2308, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 2.2986672741770136e-06, |
|
"loss": 0.2583, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2929718646770706e-06, |
|
"loss": 0.4509, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2872764551771272e-06, |
|
"loss": 0.3235, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2815810456771843e-06, |
|
"loss": 0.3594, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.2758856361772413e-06, |
|
"loss": 0.5218, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.2701902266772983e-06, |
|
"loss": 0.2541, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.2644948171773554e-06, |
|
"loss": 0.3263, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.2587994076774124e-06, |
|
"loss": 0.4093, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.253103998177469e-06, |
|
"loss": 0.3335, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.247408588677526e-06, |
|
"loss": 0.3746, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.241713179177583e-06, |
|
"loss": 0.4138, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 2.23601776967764e-06, |
|
"loss": 0.4556, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.2303223601776968e-06, |
|
"loss": 0.3204, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.224626950677754e-06, |
|
"loss": 0.2649, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.218931541177811e-06, |
|
"loss": 0.2692, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 2.213236131677868e-06, |
|
"loss": 0.3215, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.2075407221779245e-06, |
|
"loss": 0.4207, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.201845312677982e-06, |
|
"loss": 0.4033, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 2.1961499031780386e-06, |
|
"loss": 0.2342, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.1904544936780956e-06, |
|
"loss": 0.2611, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.1847590841781527e-06, |
|
"loss": 0.3371, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.1790636746782097e-06, |
|
"loss": 0.4576, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 2.1733682651782663e-06, |
|
"loss": 0.4421, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.1676728556783234e-06, |
|
"loss": 0.5129, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.1619774461783804e-06, |
|
"loss": 0.2611, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.1562820366784375e-06, |
|
"loss": 0.4886, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.150586627178494e-06, |
|
"loss": 0.1713, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.1448912176785515e-06, |
|
"loss": 0.1568, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.139195808178608e-06, |
|
"loss": 0.4705, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 2.133500398678665e-06, |
|
"loss": 0.2815, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.1278049891787222e-06, |
|
"loss": 0.2475, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.34873369336128235, |
|
"eval_runtime": 212.3511, |
|
"eval_samples_per_second": 9.517, |
|
"eval_steps_per_second": 2.383, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.1221095796787793e-06, |
|
"loss": 0.4751, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.116414170178836e-06, |
|
"loss": 0.4, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.110718760678893e-06, |
|
"loss": 0.4484, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.10502335117895e-06, |
|
"loss": 0.2746, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.099327941679007e-06, |
|
"loss": 0.4018, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.0936325321790636e-06, |
|
"loss": 0.2564, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.087937122679121e-06, |
|
"loss": 0.5503, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.0822417131791777e-06, |
|
"loss": 0.147, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.0765463036792347e-06, |
|
"loss": 0.1796, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.0708508941792914e-06, |
|
"loss": 0.5249, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.065155484679349e-06, |
|
"loss": 0.4368, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.0594600751794054e-06, |
|
"loss": 0.325, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 2.0537646656794625e-06, |
|
"loss": 0.2107, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.0480692561795195e-06, |
|
"loss": 0.5274, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.0423738466795766e-06, |
|
"loss": 0.3492, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.036678437179633e-06, |
|
"loss": 0.201, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.0309830276796906e-06, |
|
"loss": 0.5432, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.0252876181797473e-06, |
|
"loss": 0.2668, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.0195922086798043e-06, |
|
"loss": 0.3592, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.013896799179861e-06, |
|
"loss": 0.2449, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.0082013896799184e-06, |
|
"loss": 0.2965, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.002505980179975e-06, |
|
"loss": 0.5565, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.996810570680032e-06, |
|
"loss": 0.3033, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.991115161180089e-06, |
|
"loss": 0.3251, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.985419751680146e-06, |
|
"loss": 0.3458, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.9797243421802027e-06, |
|
"loss": 0.1492, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 1.9740289326802598e-06, |
|
"loss": 0.2058, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.968333523180317e-06, |
|
"loss": 0.3785, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.962638113680374e-06, |
|
"loss": 0.3519, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.9569427041804305e-06, |
|
"loss": 0.4122, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.951247294680488e-06, |
|
"loss": 0.1842, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.9455518851805446e-06, |
|
"loss": 0.3497, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.9398564756806016e-06, |
|
"loss": 0.258, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.9341610661806586e-06, |
|
"loss": 0.2991, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.9284656566807157e-06, |
|
"loss": 0.3041, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.9227702471807723e-06, |
|
"loss": 0.3762, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.9170748376808293e-06, |
|
"loss": 0.2267, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.9113794281808864e-06, |
|
"loss": 0.3088, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.9056840186809434e-06, |
|
"loss": 0.163, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.8999886091810002e-06, |
|
"loss": 0.134, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 1.8942931996810573e-06, |
|
"loss": 0.3025, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.8885977901811143e-06, |
|
"loss": 0.3232, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.8829023806811711e-06, |
|
"loss": 0.4234, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.877206971181228e-06, |
|
"loss": 0.2133, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.871511561681285e-06, |
|
"loss": 0.5324, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.865816152181342e-06, |
|
"loss": 0.6641, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.8601207426813989e-06, |
|
"loss": 0.2977, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.854425333181456e-06, |
|
"loss": 0.3066, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.848729923681513e-06, |
|
"loss": 0.2809, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.8430345141815698e-06, |
|
"loss": 0.2115, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 0.35345104336738586, |
|
"eval_runtime": 211.5013, |
|
"eval_samples_per_second": 9.555, |
|
"eval_steps_per_second": 2.392, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.8373391046816266e-06, |
|
"loss": 0.4939, |
|
"step": 14510 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.8316436951816839e-06, |
|
"loss": 0.2237, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.8259482856817407e-06, |
|
"loss": 0.0826, |
|
"step": 14530 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.8202528761817975e-06, |
|
"loss": 0.2169, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.8145574666818546e-06, |
|
"loss": 0.6057, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.8088620571819116e-06, |
|
"loss": 0.2384, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.8031666476819684e-06, |
|
"loss": 0.2647, |
|
"step": 14570 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.7974712381820255e-06, |
|
"loss": 0.2658, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.7917758286820825e-06, |
|
"loss": 0.2966, |
|
"step": 14590 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7860804191821393e-06, |
|
"loss": 0.3193, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7803850096821962e-06, |
|
"loss": 0.3529, |
|
"step": 14610 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7746896001822534e-06, |
|
"loss": 0.3283, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.7689941906823103e-06, |
|
"loss": 0.2698, |
|
"step": 14630 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.763298781182367e-06, |
|
"loss": 0.3715, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.7576033716824241e-06, |
|
"loss": 0.3999, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.7519079621824812e-06, |
|
"loss": 0.3378, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.746212552682538e-06, |
|
"loss": 0.215, |
|
"step": 14670 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.7405171431825948e-06, |
|
"loss": 0.3081, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.734821733682652e-06, |
|
"loss": 0.2634, |
|
"step": 14690 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.729126324182709e-06, |
|
"loss": 0.4724, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7234309146827657e-06, |
|
"loss": 0.294, |
|
"step": 14710 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.717735505182823e-06, |
|
"loss": 0.2399, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7120400956828798e-06, |
|
"loss": 0.2102, |
|
"step": 14730 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.7063446861829366e-06, |
|
"loss": 0.1503, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.7006492766829935e-06, |
|
"loss": 0.1694, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.6949538671830507e-06, |
|
"loss": 0.4448, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.6892584576831075e-06, |
|
"loss": 0.1987, |
|
"step": 14770 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.6835630481831644e-06, |
|
"loss": 0.3097, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.6778676386832216e-06, |
|
"loss": 0.2567, |
|
"step": 14790 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.6721722291832785e-06, |
|
"loss": 0.2016, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.6664768196833353e-06, |
|
"loss": 0.173, |
|
"step": 14810 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.6607814101833925e-06, |
|
"loss": 0.2601, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.6550860006834494e-06, |
|
"loss": 0.2977, |
|
"step": 14830 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.6493905911835062e-06, |
|
"loss": 0.2837, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.643695181683563e-06, |
|
"loss": 0.3155, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.6379997721836203e-06, |
|
"loss": 0.2774, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.632304362683677e-06, |
|
"loss": 0.0978, |
|
"step": 14870 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.626608953183734e-06, |
|
"loss": 0.4131, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.6209135436837912e-06, |
|
"loss": 0.3686, |
|
"step": 14890 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.615218134183848e-06, |
|
"loss": 0.0879, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.6095227246839048e-06, |
|
"loss": 0.401, |
|
"step": 14910 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.6038273151839617e-06, |
|
"loss": 0.2656, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.598131905684019e-06, |
|
"loss": 0.3608, |
|
"step": 14930 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.5924364961840757e-06, |
|
"loss": 0.3265, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.5867410866841326e-06, |
|
"loss": 0.372, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.5810456771841898e-06, |
|
"loss": 0.302, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.5753502676842467e-06, |
|
"loss": 0.2329, |
|
"step": 14970 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.5696548581843035e-06, |
|
"loss": 0.1317, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.5639594486843607e-06, |
|
"loss": 0.5018, |
|
"step": 14990 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.5582640391844176e-06, |
|
"loss": 0.4088, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 0.3560781478881836, |
|
"eval_runtime": 211.6609, |
|
"eval_samples_per_second": 9.548, |
|
"eval_steps_per_second": 2.391, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.5525686296844744e-06, |
|
"loss": 0.3594, |
|
"step": 15010 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.5468732201845312e-06, |
|
"loss": 0.1288, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.5411778106845885e-06, |
|
"loss": 0.1063, |
|
"step": 15030 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.5354824011846453e-06, |
|
"loss": 0.3444, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.5297869916847021e-06, |
|
"loss": 0.393, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.5240915821847594e-06, |
|
"loss": 0.3674, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.5183961726848162e-06, |
|
"loss": 0.2209, |
|
"step": 15070 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.512700763184873e-06, |
|
"loss": 0.2758, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 1.5070053536849299e-06, |
|
"loss": 0.3258, |
|
"step": 15090 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.5013099441849871e-06, |
|
"loss": 0.5878, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.495614534685044e-06, |
|
"loss": 0.2565, |
|
"step": 15110 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 1.4899191251851008e-06, |
|
"loss": 0.1671, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.484223715685158e-06, |
|
"loss": 0.3276, |
|
"step": 15130 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.4785283061852149e-06, |
|
"loss": 0.5138, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.4728328966852717e-06, |
|
"loss": 0.4189, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.4671374871853285e-06, |
|
"loss": 0.1441, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4614420776853858e-06, |
|
"loss": 0.4461, |
|
"step": 15170 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4557466681854426e-06, |
|
"loss": 0.2924, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4500512586854994e-06, |
|
"loss": 0.2742, |
|
"step": 15190 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.4443558491855567e-06, |
|
"loss": 0.5659, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.4386604396856135e-06, |
|
"loss": 0.3907, |
|
"step": 15210 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.4329650301856703e-06, |
|
"loss": 0.3276, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.4272696206857276e-06, |
|
"loss": 0.1779, |
|
"step": 15230 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.4215742111857844e-06, |
|
"loss": 0.2407, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.4158788016858412e-06, |
|
"loss": 0.5414, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.410183392185898e-06, |
|
"loss": 0.213, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.4044879826859553e-06, |
|
"loss": 0.3669, |
|
"step": 15270 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.3987925731860122e-06, |
|
"loss": 0.4115, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.393097163686069e-06, |
|
"loss": 0.352, |
|
"step": 15290 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.3874017541861262e-06, |
|
"loss": 0.5727, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.381706344686183e-06, |
|
"loss": 0.3396, |
|
"step": 15310 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.3760109351862399e-06, |
|
"loss": 0.2974, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.370315525686297e-06, |
|
"loss": 0.3642, |
|
"step": 15330 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 1.364620116186354e-06, |
|
"loss": 0.2653, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.3589247066864108e-06, |
|
"loss": 0.3407, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.3532292971864676e-06, |
|
"loss": 0.3, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.3475338876865249e-06, |
|
"loss": 0.2095, |
|
"step": 15370 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.3418384781865817e-06, |
|
"loss": 0.2906, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.3361430686866385e-06, |
|
"loss": 0.263, |
|
"step": 15390 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.3304476591866956e-06, |
|
"loss": 0.2987, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.3247522496867526e-06, |
|
"loss": 0.3188, |
|
"step": 15410 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.3190568401868094e-06, |
|
"loss": 0.6636, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.3133614306868665e-06, |
|
"loss": 0.4354, |
|
"step": 15430 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.3076660211869235e-06, |
|
"loss": 0.2682, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.3019706116869804e-06, |
|
"loss": 0.4007, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.2962752021870372e-06, |
|
"loss": 0.4228, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.2905797926870944e-06, |
|
"loss": 0.2764, |
|
"step": 15470 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.2848843831871513e-06, |
|
"loss": 0.1821, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.279188973687208e-06, |
|
"loss": 0.1724, |
|
"step": 15490 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.2734935641872651e-06, |
|
"loss": 0.502, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 0.3557915985584259, |
|
"eval_runtime": 212.001, |
|
"eval_samples_per_second": 9.533, |
|
"eval_steps_per_second": 2.387, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 1.2677981546873222e-06, |
|
"loss": 0.4079, |
|
"step": 15510 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.262102745187379e-06, |
|
"loss": 0.3792, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.256407335687436e-06, |
|
"loss": 0.2857, |
|
"step": 15530 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.250711926187493e-06, |
|
"loss": 0.2627, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.24501651668755e-06, |
|
"loss": 0.3787, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.239321107187607e-06, |
|
"loss": 0.4017, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.2336256976876638e-06, |
|
"loss": 0.3776, |
|
"step": 15570 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.2279302881877208e-06, |
|
"loss": 0.3595, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.2222348786877776e-06, |
|
"loss": 0.2288, |
|
"step": 15590 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.2165394691878347e-06, |
|
"loss": 0.3921, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.2108440596878917e-06, |
|
"loss": 0.3526, |
|
"step": 15610 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.2051486501879486e-06, |
|
"loss": 0.2754, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.1994532406880056e-06, |
|
"loss": 0.347, |
|
"step": 15630 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.1937578311880624e-06, |
|
"loss": 0.2734, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.1880624216881195e-06, |
|
"loss": 0.3327, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 1.1823670121881765e-06, |
|
"loss": 0.3287, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.1766716026882333e-06, |
|
"loss": 0.3132, |
|
"step": 15670 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.1709761931882904e-06, |
|
"loss": 0.4023, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.1652807836883472e-06, |
|
"loss": 0.3814, |
|
"step": 15690 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.1595853741884042e-06, |
|
"loss": 0.4485, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.1538899646884613e-06, |
|
"loss": 0.286, |
|
"step": 15710 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.1481945551885181e-06, |
|
"loss": 0.6182, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.1424991456885751e-06, |
|
"loss": 0.4019, |
|
"step": 15730 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.136803736188632e-06, |
|
"loss": 0.3184, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.131108326688689e-06, |
|
"loss": 0.4947, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.125412917188746e-06, |
|
"loss": 0.4203, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.1197175076888029e-06, |
|
"loss": 0.1995, |
|
"step": 15770 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.11402209818886e-06, |
|
"loss": 0.2659, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.1083266886889168e-06, |
|
"loss": 0.3046, |
|
"step": 15790 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.1026312791889738e-06, |
|
"loss": 0.1809, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.0969358696890308e-06, |
|
"loss": 0.2639, |
|
"step": 15810 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.0912404601890877e-06, |
|
"loss": 0.272, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.0855450506891447e-06, |
|
"loss": 0.6332, |
|
"step": 15830 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.0798496411892015e-06, |
|
"loss": 0.3555, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.0741542316892586e-06, |
|
"loss": 0.271, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.0684588221893156e-06, |
|
"loss": 0.1772, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.0627634126893724e-06, |
|
"loss": 0.4972, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.0570680031894295e-06, |
|
"loss": 0.3808, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.0513725936894863e-06, |
|
"loss": 0.2823, |
|
"step": 15890 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 1.0456771841895433e-06, |
|
"loss": 0.3016, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.0399817746896004e-06, |
|
"loss": 0.4662, |
|
"step": 15910 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.0342863651896572e-06, |
|
"loss": 0.3594, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.0285909556897143e-06, |
|
"loss": 0.2838, |
|
"step": 15930 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.022895546189771e-06, |
|
"loss": 0.3706, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.0172001366898281e-06, |
|
"loss": 0.4332, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.0115047271898852e-06, |
|
"loss": 0.2483, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.005809317689942e-06, |
|
"loss": 0.2208, |
|
"step": 15970 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.000113908189999e-06, |
|
"loss": 0.3334, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 9.944184986900559e-07, |
|
"loss": 0.298, |
|
"step": 15990 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 9.88723089190113e-07, |
|
"loss": 0.3099, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 0.3557519316673279, |
|
"eval_runtime": 212.3782, |
|
"eval_samples_per_second": 9.516, |
|
"eval_steps_per_second": 2.383, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 9.8302767969017e-07, |
|
"loss": 0.4163, |
|
"step": 16010 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 9.773322701902268e-07, |
|
"loss": 0.1947, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 9.716368606902838e-07, |
|
"loss": 0.3316, |
|
"step": 16030 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 9.659414511903406e-07, |
|
"loss": 0.1959, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 9.602460416903977e-07, |
|
"loss": 0.2092, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 9.545506321904547e-07, |
|
"loss": 0.1516, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 9.488552226905115e-07, |
|
"loss": 0.5363, |
|
"step": 16070 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 9.431598131905685e-07, |
|
"loss": 0.3025, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.374644036906254e-07, |
|
"loss": 0.4779, |
|
"step": 16090 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.317689941906824e-07, |
|
"loss": 0.277, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.260735846907394e-07, |
|
"loss": 0.1441, |
|
"step": 16110 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 9.203781751907963e-07, |
|
"loss": 0.2157, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 9.146827656908533e-07, |
|
"loss": 0.2452, |
|
"step": 16130 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 9.089873561909102e-07, |
|
"loss": 0.4405, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 9.032919466909671e-07, |
|
"loss": 0.4546, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 8.975965371910241e-07, |
|
"loss": 0.4222, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 8.919011276910811e-07, |
|
"loss": 0.4103, |
|
"step": 16170 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 8.86205718191138e-07, |
|
"loss": 0.522, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 8.80510308691195e-07, |
|
"loss": 0.4269, |
|
"step": 16190 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.748148991912519e-07, |
|
"loss": 0.3538, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.691194896913088e-07, |
|
"loss": 0.3338, |
|
"step": 16210 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.634240801913659e-07, |
|
"loss": 0.2906, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.577286706914228e-07, |
|
"loss": 0.3005, |
|
"step": 16230 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.520332611914797e-07, |
|
"loss": 0.5027, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.463378516915367e-07, |
|
"loss": 0.255, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 8.406424421915936e-07, |
|
"loss": 0.1749, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.349470326916507e-07, |
|
"loss": 0.2787, |
|
"step": 16270 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.292516231917075e-07, |
|
"loss": 0.2366, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 8.235562136917645e-07, |
|
"loss": 0.2687, |
|
"step": 16290 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.178608041918215e-07, |
|
"loss": 0.4187, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.121653946918784e-07, |
|
"loss": 0.1763, |
|
"step": 16310 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.064699851919354e-07, |
|
"loss": 0.377, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 8.007745756919923e-07, |
|
"loss": 0.5291, |
|
"step": 16330 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.950791661920493e-07, |
|
"loss": 0.3513, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.893837566921062e-07, |
|
"loss": 0.2492, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 7.836883471921632e-07, |
|
"loss": 0.4625, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.779929376922202e-07, |
|
"loss": 0.2957, |
|
"step": 16370 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.72297528192277e-07, |
|
"loss": 0.3181, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.666021186923341e-07, |
|
"loss": 0.1754, |
|
"step": 16390 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.609067091923909e-07, |
|
"loss": 0.4674, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.55211299692448e-07, |
|
"loss": 0.3187, |
|
"step": 16410 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.49515890192505e-07, |
|
"loss": 0.5942, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.438204806925618e-07, |
|
"loss": 0.3195, |
|
"step": 16430 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 7.381250711926189e-07, |
|
"loss": 0.5229, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 7.324296616926757e-07, |
|
"loss": 0.4654, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 7.267342521927327e-07, |
|
"loss": 0.4792, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 7.210388426927898e-07, |
|
"loss": 0.2923, |
|
"step": 16470 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.153434331928466e-07, |
|
"loss": 0.2751, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.096480236929036e-07, |
|
"loss": 0.1897, |
|
"step": 16490 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.039526141929605e-07, |
|
"loss": 0.2381, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 0.3569630980491638, |
|
"eval_runtime": 212.316, |
|
"eval_samples_per_second": 9.519, |
|
"eval_steps_per_second": 2.383, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 6.982572046930175e-07, |
|
"loss": 0.4211, |
|
"step": 16510 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 6.925617951930743e-07, |
|
"loss": 0.2632, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 6.868663856931314e-07, |
|
"loss": 0.3168, |
|
"step": 16530 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 6.811709761931884e-07, |
|
"loss": 0.1798, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.754755666932452e-07, |
|
"loss": 0.2793, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.697801571933023e-07, |
|
"loss": 0.2896, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.640847476933591e-07, |
|
"loss": 0.4018, |
|
"step": 16570 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 6.583893381934162e-07, |
|
"loss": 0.4488, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.526939286934732e-07, |
|
"loss": 0.1678, |
|
"step": 16590 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.4699851919353e-07, |
|
"loss": 0.2788, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 6.413031096935871e-07, |
|
"loss": 0.2512, |
|
"step": 16610 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 6.356077001936439e-07, |
|
"loss": 0.297, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 6.299122906937009e-07, |
|
"loss": 0.1882, |
|
"step": 16630 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 6.242168811937579e-07, |
|
"loss": 0.4612, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 6.185214716938148e-07, |
|
"loss": 0.3111, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.128260621938717e-07, |
|
"loss": 0.3121, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.071306526939288e-07, |
|
"loss": 0.2923, |
|
"step": 16670 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 6.014352431939857e-07, |
|
"loss": 0.3701, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.957398336940426e-07, |
|
"loss": 0.2431, |
|
"step": 16690 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.900444241940996e-07, |
|
"loss": 0.3444, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.843490146941565e-07, |
|
"loss": 0.2769, |
|
"step": 16710 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 5.786536051942134e-07, |
|
"loss": 0.1501, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.729581956942705e-07, |
|
"loss": 0.3458, |
|
"step": 16730 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.672627861943274e-07, |
|
"loss": 0.3527, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.615673766943844e-07, |
|
"loss": 0.3205, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.558719671944413e-07, |
|
"loss": 0.2322, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.501765576944982e-07, |
|
"loss": 0.583, |
|
"step": 16770 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.444811481945552e-07, |
|
"loss": 0.3078, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.387857386946122e-07, |
|
"loss": 0.2509, |
|
"step": 16790 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.330903291946691e-07, |
|
"loss": 0.2672, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.273949196947261e-07, |
|
"loss": 0.4086, |
|
"step": 16810 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.21699510194783e-07, |
|
"loss": 0.4899, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 5.1600410069484e-07, |
|
"loss": 0.2831, |
|
"step": 16830 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 5.10308691194897e-07, |
|
"loss": 0.3935, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 5.046132816949539e-07, |
|
"loss": 0.3414, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 4.989178721950108e-07, |
|
"loss": 0.2679, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.932224626950678e-07, |
|
"loss": 0.313, |
|
"step": 16870 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.875270531951248e-07, |
|
"loss": 0.3272, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.818316436951817e-07, |
|
"loss": 0.321, |
|
"step": 16890 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.7613623419523863e-07, |
|
"loss": 0.4763, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.7044082469529567e-07, |
|
"loss": 0.2577, |
|
"step": 16910 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.647454151953526e-07, |
|
"loss": 0.2298, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 4.5905000569540954e-07, |
|
"loss": 0.3451, |
|
"step": 16930 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.533545961954665e-07, |
|
"loss": 0.2389, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.476591866955234e-07, |
|
"loss": 0.3936, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.4196377719558034e-07, |
|
"loss": 0.3509, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.362683676956374e-07, |
|
"loss": 0.2643, |
|
"step": 16970 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.305729581956943e-07, |
|
"loss": 0.541, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.2487754869575125e-07, |
|
"loss": 0.1942, |
|
"step": 16990 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.191821391958082e-07, |
|
"loss": 0.3614, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_loss": 0.3553633391857147, |
|
"eval_runtime": 211.9344, |
|
"eval_samples_per_second": 9.536, |
|
"eval_steps_per_second": 2.388, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.1348672969586517e-07, |
|
"loss": 0.3707, |
|
"step": 17010 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.077913201959221e-07, |
|
"loss": 0.3282, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.020959106959791e-07, |
|
"loss": 0.51, |
|
"step": 17030 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.9640050119603603e-07, |
|
"loss": 0.1609, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.9070509169609296e-07, |
|
"loss": 0.3832, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.8500968219614995e-07, |
|
"loss": 0.2903, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.793142726962069e-07, |
|
"loss": 0.2686, |
|
"step": 17070 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 3.736188631962638e-07, |
|
"loss": 0.2917, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 3.679234536963208e-07, |
|
"loss": 0.265, |
|
"step": 17090 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 3.6222804419637774e-07, |
|
"loss": 0.3747, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 3.5653263469643473e-07, |
|
"loss": 0.5041, |
|
"step": 17110 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.5083722519649166e-07, |
|
"loss": 0.2131, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.451418156965486e-07, |
|
"loss": 0.346, |
|
"step": 17130 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 3.3944640619660553e-07, |
|
"loss": 0.1843, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.337509966966625e-07, |
|
"loss": 0.4283, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.280555871967195e-07, |
|
"loss": 0.4339, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.2236017769677644e-07, |
|
"loss": 0.2597, |
|
"step": 17170 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 3.1666476819683337e-07, |
|
"loss": 0.3388, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.1096935869689036e-07, |
|
"loss": 0.4598, |
|
"step": 17190 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.052739491969473e-07, |
|
"loss": 0.3224, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.9957853969700423e-07, |
|
"loss": 0.3763, |
|
"step": 17210 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.938831301970612e-07, |
|
"loss": 0.3208, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.8818772069711815e-07, |
|
"loss": 0.1648, |
|
"step": 17230 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.824923111971751e-07, |
|
"loss": 0.2525, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 2.7679690169723207e-07, |
|
"loss": 0.5624, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.71101492197289e-07, |
|
"loss": 0.4129, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.6540608269734594e-07, |
|
"loss": 0.2936, |
|
"step": 17270 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.5971067319740293e-07, |
|
"loss": 0.1014, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.5401526369745986e-07, |
|
"loss": 0.5215, |
|
"step": 17290 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.483198541975168e-07, |
|
"loss": 0.3502, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.426244446975738e-07, |
|
"loss": 0.4451, |
|
"step": 17310 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.3692903519763072e-07, |
|
"loss": 0.3947, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.3123362569768768e-07, |
|
"loss": 0.3667, |
|
"step": 17330 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.2553821619774464e-07, |
|
"loss": 0.3662, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.1984280669780157e-07, |
|
"loss": 0.2084, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.1414739719785853e-07, |
|
"loss": 0.5334, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.084519876979155e-07, |
|
"loss": 0.3224, |
|
"step": 17370 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.0275657819797246e-07, |
|
"loss": 0.2063, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.970611686980294e-07, |
|
"loss": 0.2368, |
|
"step": 17390 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.9136575919808638e-07, |
|
"loss": 0.3701, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.856703496981433e-07, |
|
"loss": 0.2256, |
|
"step": 17410 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.7997494019820025e-07, |
|
"loss": 0.2116, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.7427953069825723e-07, |
|
"loss": 0.4188, |
|
"step": 17430 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.6858412119831417e-07, |
|
"loss": 0.3854, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.628887116983711e-07, |
|
"loss": 0.2101, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.571933021984281e-07, |
|
"loss": 0.3551, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.5149789269848502e-07, |
|
"loss": 0.3388, |
|
"step": 17470 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.4580248319854198e-07, |
|
"loss": 0.478, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.4010707369859895e-07, |
|
"loss": 0.1387, |
|
"step": 17490 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.3441166419865588e-07, |
|
"loss": 0.5615, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_loss": 0.35653403401374817, |
|
"eval_runtime": 212.1272, |
|
"eval_samples_per_second": 9.527, |
|
"eval_steps_per_second": 2.385, |
|
"step": 17500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 17735, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 666056317861888.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|