finetuned-PMCLLaMA-13B-MS2 / trainer_state.json
jiminHuang's picture
Upload folder using huggingface_hub
ed9ebfc verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.93374682830561,
"eval_steps": 500,
"global_step": 17500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.93752581844851e-06,
"loss": 3.0868,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 6.42386919416686e-06,
"loss": 3.9106,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 7.293324332157391e-06,
"loss": 3.2471,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 7.910212569885209e-06,
"loss": 2.2106,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 8.388708261178669e-06,
"loss": 1.3444,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 8.77966770787574e-06,
"loss": 1.1874,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 9.110219210139135e-06,
"loss": 0.7707,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 9.396555945603558e-06,
"loss": 1.07,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 9.649122845866272e-06,
"loss": 0.8138,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 9.87505163689702e-06,
"loss": 0.7951,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 9.997152349311819e-06,
"loss": 1.8935,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 9.98766018035121e-06,
"loss": 1.1202,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 9.978168011390604e-06,
"loss": 0.8436,
"step": 130
},
{
"epoch": 0.04,
"learning_rate": 9.968675842429996e-06,
"loss": 1.6259,
"step": 140
},
{
"epoch": 0.04,
"learning_rate": 9.959183673469387e-06,
"loss": 0.688,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 9.94969150450878e-06,
"loss": 1.2987,
"step": 160
},
{
"epoch": 0.05,
"learning_rate": 9.940199335548174e-06,
"loss": 1.1667,
"step": 170
},
{
"epoch": 0.05,
"learning_rate": 9.930707166587566e-06,
"loss": 0.9948,
"step": 180
},
{
"epoch": 0.05,
"learning_rate": 9.92121499762696e-06,
"loss": 1.1458,
"step": 190
},
{
"epoch": 0.06,
"learning_rate": 9.911722828666351e-06,
"loss": 1.2454,
"step": 200
},
{
"epoch": 0.06,
"learning_rate": 9.902230659705745e-06,
"loss": 0.8821,
"step": 210
},
{
"epoch": 0.06,
"learning_rate": 9.892738490745136e-06,
"loss": 0.7736,
"step": 220
},
{
"epoch": 0.06,
"learning_rate": 9.883246321784528e-06,
"loss": 1.0794,
"step": 230
},
{
"epoch": 0.07,
"learning_rate": 9.873754152823922e-06,
"loss": 1.4458,
"step": 240
},
{
"epoch": 0.07,
"learning_rate": 9.864261983863313e-06,
"loss": 0.9957,
"step": 250
},
{
"epoch": 0.07,
"learning_rate": 9.854769814902707e-06,
"loss": 0.6101,
"step": 260
},
{
"epoch": 0.08,
"learning_rate": 9.845277645942099e-06,
"loss": 0.6848,
"step": 270
},
{
"epoch": 0.08,
"learning_rate": 9.83578547698149e-06,
"loss": 1.6407,
"step": 280
},
{
"epoch": 0.08,
"learning_rate": 9.826293308020884e-06,
"loss": 0.8201,
"step": 290
},
{
"epoch": 0.08,
"learning_rate": 9.816801139060275e-06,
"loss": 0.9695,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 9.807308970099669e-06,
"loss": 1.131,
"step": 310
},
{
"epoch": 0.09,
"learning_rate": 9.79781680113906e-06,
"loss": 0.8368,
"step": 320
},
{
"epoch": 0.09,
"learning_rate": 9.788324632178452e-06,
"loss": 1.0931,
"step": 330
},
{
"epoch": 0.1,
"learning_rate": 9.778832463217846e-06,
"loss": 0.617,
"step": 340
},
{
"epoch": 0.1,
"learning_rate": 9.76934029425724e-06,
"loss": 0.3638,
"step": 350
},
{
"epoch": 0.1,
"learning_rate": 9.759848125296631e-06,
"loss": 1.0894,
"step": 360
},
{
"epoch": 0.1,
"learning_rate": 9.750355956336024e-06,
"loss": 1.2748,
"step": 370
},
{
"epoch": 0.11,
"learning_rate": 9.740863787375416e-06,
"loss": 1.4306,
"step": 380
},
{
"epoch": 0.11,
"learning_rate": 9.731371618414808e-06,
"loss": 0.9422,
"step": 390
},
{
"epoch": 0.11,
"learning_rate": 9.721879449454201e-06,
"loss": 0.6539,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 9.712387280493593e-06,
"loss": 0.862,
"step": 410
},
{
"epoch": 0.12,
"learning_rate": 9.702895111532987e-06,
"loss": 1.2179,
"step": 420
},
{
"epoch": 0.12,
"learning_rate": 9.693402942572378e-06,
"loss": 0.9396,
"step": 430
},
{
"epoch": 0.12,
"learning_rate": 9.683910773611772e-06,
"loss": 0.897,
"step": 440
},
{
"epoch": 0.13,
"learning_rate": 9.674418604651164e-06,
"loss": 0.8183,
"step": 450
},
{
"epoch": 0.13,
"learning_rate": 9.664926435690555e-06,
"loss": 0.5799,
"step": 460
},
{
"epoch": 0.13,
"learning_rate": 9.655434266729949e-06,
"loss": 0.6694,
"step": 470
},
{
"epoch": 0.14,
"learning_rate": 9.64594209776934e-06,
"loss": 0.6407,
"step": 480
},
{
"epoch": 0.14,
"learning_rate": 9.636449928808734e-06,
"loss": 0.4138,
"step": 490
},
{
"epoch": 0.14,
"learning_rate": 9.626957759848126e-06,
"loss": 1.255,
"step": 500
},
{
"epoch": 0.14,
"eval_loss": 0.5974699854850769,
"eval_runtime": 216.3078,
"eval_samples_per_second": 9.343,
"eval_steps_per_second": 2.339,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 9.617465590887517e-06,
"loss": 1.0736,
"step": 510
},
{
"epoch": 0.15,
"learning_rate": 9.607973421926911e-06,
"loss": 0.7988,
"step": 520
},
{
"epoch": 0.15,
"learning_rate": 9.598481252966304e-06,
"loss": 0.7407,
"step": 530
},
{
"epoch": 0.15,
"learning_rate": 9.588989084005696e-06,
"loss": 0.6603,
"step": 540
},
{
"epoch": 0.16,
"learning_rate": 9.57949691504509e-06,
"loss": 0.5824,
"step": 550
},
{
"epoch": 0.16,
"learning_rate": 9.570004746084481e-06,
"loss": 0.8746,
"step": 560
},
{
"epoch": 0.16,
"learning_rate": 9.560512577123873e-06,
"loss": 0.3868,
"step": 570
},
{
"epoch": 0.16,
"learning_rate": 9.551020408163266e-06,
"loss": 0.5346,
"step": 580
},
{
"epoch": 0.17,
"learning_rate": 9.541528239202658e-06,
"loss": 0.559,
"step": 590
},
{
"epoch": 0.17,
"learning_rate": 9.532036070242052e-06,
"loss": 0.8601,
"step": 600
},
{
"epoch": 0.17,
"learning_rate": 9.522543901281443e-06,
"loss": 0.5392,
"step": 610
},
{
"epoch": 0.17,
"learning_rate": 9.513051732320835e-06,
"loss": 0.6593,
"step": 620
},
{
"epoch": 0.18,
"learning_rate": 9.503559563360229e-06,
"loss": 0.8227,
"step": 630
},
{
"epoch": 0.18,
"learning_rate": 9.49406739439962e-06,
"loss": 0.6741,
"step": 640
},
{
"epoch": 0.18,
"learning_rate": 9.484575225439014e-06,
"loss": 0.3784,
"step": 650
},
{
"epoch": 0.19,
"learning_rate": 9.475083056478406e-06,
"loss": 0.4041,
"step": 660
},
{
"epoch": 0.19,
"learning_rate": 9.465590887517799e-06,
"loss": 0.8986,
"step": 670
},
{
"epoch": 0.19,
"learning_rate": 9.45609871855719e-06,
"loss": 0.6402,
"step": 680
},
{
"epoch": 0.19,
"learning_rate": 9.446606549596583e-06,
"loss": 0.5837,
"step": 690
},
{
"epoch": 0.2,
"learning_rate": 9.437114380635976e-06,
"loss": 0.7486,
"step": 700
},
{
"epoch": 0.2,
"learning_rate": 9.42762221167537e-06,
"loss": 0.7009,
"step": 710
},
{
"epoch": 0.2,
"learning_rate": 9.418130042714761e-06,
"loss": 0.4641,
"step": 720
},
{
"epoch": 0.21,
"learning_rate": 9.408637873754155e-06,
"loss": 0.0882,
"step": 730
},
{
"epoch": 0.21,
"learning_rate": 9.399145704793546e-06,
"loss": 0.4681,
"step": 740
},
{
"epoch": 0.21,
"learning_rate": 9.389653535832938e-06,
"loss": 0.5282,
"step": 750
},
{
"epoch": 0.21,
"learning_rate": 9.380161366872332e-06,
"loss": 0.7416,
"step": 760
},
{
"epoch": 0.22,
"learning_rate": 9.370669197911723e-06,
"loss": 0.6423,
"step": 770
},
{
"epoch": 0.22,
"learning_rate": 9.361177028951117e-06,
"loss": 0.6967,
"step": 780
},
{
"epoch": 0.22,
"learning_rate": 9.351684859990508e-06,
"loss": 0.4016,
"step": 790
},
{
"epoch": 0.23,
"learning_rate": 9.3421926910299e-06,
"loss": 0.698,
"step": 800
},
{
"epoch": 0.23,
"learning_rate": 9.332700522069294e-06,
"loss": 0.4438,
"step": 810
},
{
"epoch": 0.23,
"learning_rate": 9.323208353108685e-06,
"loss": 0.5083,
"step": 820
},
{
"epoch": 0.23,
"learning_rate": 9.313716184148079e-06,
"loss": 0.8066,
"step": 830
},
{
"epoch": 0.24,
"learning_rate": 9.30422401518747e-06,
"loss": 0.5104,
"step": 840
},
{
"epoch": 0.24,
"learning_rate": 9.294731846226862e-06,
"loss": 0.383,
"step": 850
},
{
"epoch": 0.24,
"learning_rate": 9.285239677266256e-06,
"loss": 0.4484,
"step": 860
},
{
"epoch": 0.25,
"learning_rate": 9.275747508305648e-06,
"loss": 0.5572,
"step": 870
},
{
"epoch": 0.25,
"learning_rate": 9.266255339345041e-06,
"loss": 0.6063,
"step": 880
},
{
"epoch": 0.25,
"learning_rate": 9.256763170384434e-06,
"loss": 0.6665,
"step": 890
},
{
"epoch": 0.25,
"learning_rate": 9.247271001423826e-06,
"loss": 0.6123,
"step": 900
},
{
"epoch": 0.26,
"learning_rate": 9.23777883246322e-06,
"loss": 0.5005,
"step": 910
},
{
"epoch": 0.26,
"learning_rate": 9.228286663502611e-06,
"loss": 0.9236,
"step": 920
},
{
"epoch": 0.26,
"learning_rate": 9.218794494542003e-06,
"loss": 0.4072,
"step": 930
},
{
"epoch": 0.27,
"learning_rate": 9.209302325581397e-06,
"loss": 0.6445,
"step": 940
},
{
"epoch": 0.27,
"learning_rate": 9.199810156620788e-06,
"loss": 0.9185,
"step": 950
},
{
"epoch": 0.27,
"learning_rate": 9.190317987660182e-06,
"loss": 0.261,
"step": 960
},
{
"epoch": 0.27,
"learning_rate": 9.180825818699574e-06,
"loss": 0.7811,
"step": 970
},
{
"epoch": 0.28,
"learning_rate": 9.171333649738965e-06,
"loss": 0.7295,
"step": 980
},
{
"epoch": 0.28,
"learning_rate": 9.161841480778359e-06,
"loss": 0.3809,
"step": 990
},
{
"epoch": 0.28,
"learning_rate": 9.15234931181775e-06,
"loss": 0.6035,
"step": 1000
},
{
"epoch": 0.28,
"eval_loss": 0.48104238510131836,
"eval_runtime": 210.5024,
"eval_samples_per_second": 9.601,
"eval_steps_per_second": 2.404,
"step": 1000
},
{
"epoch": 0.28,
"learning_rate": 9.142857142857144e-06,
"loss": 0.9329,
"step": 1010
},
{
"epoch": 0.29,
"learning_rate": 9.133364973896536e-06,
"loss": 0.65,
"step": 1020
},
{
"epoch": 0.29,
"learning_rate": 9.123872804935927e-06,
"loss": 0.3792,
"step": 1030
},
{
"epoch": 0.29,
"learning_rate": 9.11438063597532e-06,
"loss": 0.1691,
"step": 1040
},
{
"epoch": 0.3,
"learning_rate": 9.104888467014713e-06,
"loss": 0.6827,
"step": 1050
},
{
"epoch": 0.3,
"learning_rate": 9.095396298054106e-06,
"loss": 0.6459,
"step": 1060
},
{
"epoch": 0.3,
"learning_rate": 9.0859041290935e-06,
"loss": 0.8112,
"step": 1070
},
{
"epoch": 0.3,
"learning_rate": 9.076411960132891e-06,
"loss": 0.6337,
"step": 1080
},
{
"epoch": 0.31,
"learning_rate": 9.066919791172285e-06,
"loss": 0.4256,
"step": 1090
},
{
"epoch": 0.31,
"learning_rate": 9.057427622211676e-06,
"loss": 0.2803,
"step": 1100
},
{
"epoch": 0.31,
"learning_rate": 9.047935453251068e-06,
"loss": 0.5562,
"step": 1110
},
{
"epoch": 0.32,
"learning_rate": 9.038443284290462e-06,
"loss": 0.3124,
"step": 1120
},
{
"epoch": 0.32,
"learning_rate": 9.028951115329853e-06,
"loss": 0.3223,
"step": 1130
},
{
"epoch": 0.32,
"learning_rate": 9.019458946369247e-06,
"loss": 0.5686,
"step": 1140
},
{
"epoch": 0.32,
"learning_rate": 9.009966777408639e-06,
"loss": 0.2857,
"step": 1150
},
{
"epoch": 0.33,
"learning_rate": 9.00047460844803e-06,
"loss": 0.4192,
"step": 1160
},
{
"epoch": 0.33,
"learning_rate": 8.990982439487424e-06,
"loss": 0.2262,
"step": 1170
},
{
"epoch": 0.33,
"learning_rate": 8.981490270526815e-06,
"loss": 0.6804,
"step": 1180
},
{
"epoch": 0.34,
"learning_rate": 8.971998101566209e-06,
"loss": 0.3721,
"step": 1190
},
{
"epoch": 0.34,
"learning_rate": 8.9625059326056e-06,
"loss": 0.47,
"step": 1200
},
{
"epoch": 0.34,
"learning_rate": 8.953013763644992e-06,
"loss": 0.7316,
"step": 1210
},
{
"epoch": 0.34,
"learning_rate": 8.943521594684386e-06,
"loss": 0.7199,
"step": 1220
},
{
"epoch": 0.35,
"learning_rate": 8.934029425723778e-06,
"loss": 0.6315,
"step": 1230
},
{
"epoch": 0.35,
"learning_rate": 8.924537256763171e-06,
"loss": 0.4095,
"step": 1240
},
{
"epoch": 0.35,
"learning_rate": 8.915045087802565e-06,
"loss": 1.0135,
"step": 1250
},
{
"epoch": 0.36,
"learning_rate": 8.905552918841956e-06,
"loss": 0.6973,
"step": 1260
},
{
"epoch": 0.36,
"learning_rate": 8.89606074988135e-06,
"loss": 0.3849,
"step": 1270
},
{
"epoch": 0.36,
"learning_rate": 8.886568580920741e-06,
"loss": 0.4011,
"step": 1280
},
{
"epoch": 0.36,
"learning_rate": 8.877076411960133e-06,
"loss": 0.2936,
"step": 1290
},
{
"epoch": 0.37,
"learning_rate": 8.867584242999527e-06,
"loss": 0.5101,
"step": 1300
},
{
"epoch": 0.37,
"learning_rate": 8.858092074038918e-06,
"loss": 0.4414,
"step": 1310
},
{
"epoch": 0.37,
"learning_rate": 8.848599905078312e-06,
"loss": 0.5243,
"step": 1320
},
{
"epoch": 0.37,
"learning_rate": 8.839107736117704e-06,
"loss": 0.3593,
"step": 1330
},
{
"epoch": 0.38,
"learning_rate": 8.829615567157095e-06,
"loss": 1.0084,
"step": 1340
},
{
"epoch": 0.38,
"learning_rate": 8.820123398196489e-06,
"loss": 0.612,
"step": 1350
},
{
"epoch": 0.38,
"learning_rate": 8.81063122923588e-06,
"loss": 0.6974,
"step": 1360
},
{
"epoch": 0.39,
"learning_rate": 8.801139060275274e-06,
"loss": 0.0845,
"step": 1370
},
{
"epoch": 0.39,
"learning_rate": 8.791646891314666e-06,
"loss": 0.4725,
"step": 1380
},
{
"epoch": 0.39,
"learning_rate": 8.782154722354057e-06,
"loss": 0.5528,
"step": 1390
},
{
"epoch": 0.39,
"learning_rate": 8.772662553393451e-06,
"loss": 0.4501,
"step": 1400
},
{
"epoch": 0.4,
"learning_rate": 8.763170384432843e-06,
"loss": 0.5435,
"step": 1410
},
{
"epoch": 0.4,
"learning_rate": 8.753678215472236e-06,
"loss": 0.3251,
"step": 1420
},
{
"epoch": 0.4,
"learning_rate": 8.74418604651163e-06,
"loss": 0.5594,
"step": 1430
},
{
"epoch": 0.41,
"learning_rate": 8.734693877551021e-06,
"loss": 0.7562,
"step": 1440
},
{
"epoch": 0.41,
"learning_rate": 8.725201708590415e-06,
"loss": 0.463,
"step": 1450
},
{
"epoch": 0.41,
"learning_rate": 8.715709539629807e-06,
"loss": 0.5296,
"step": 1460
},
{
"epoch": 0.41,
"learning_rate": 8.706217370669198e-06,
"loss": 0.6942,
"step": 1470
},
{
"epoch": 0.42,
"learning_rate": 8.696725201708592e-06,
"loss": 0.6386,
"step": 1480
},
{
"epoch": 0.42,
"learning_rate": 8.687233032747983e-06,
"loss": 0.4996,
"step": 1490
},
{
"epoch": 0.42,
"learning_rate": 8.677740863787377e-06,
"loss": 0.2947,
"step": 1500
},
{
"epoch": 0.42,
"eval_loss": 0.43847087025642395,
"eval_runtime": 211.0507,
"eval_samples_per_second": 9.576,
"eval_steps_per_second": 2.398,
"step": 1500
},
{
"epoch": 0.43,
"learning_rate": 8.668248694826769e-06,
"loss": 0.5069,
"step": 1510
},
{
"epoch": 0.43,
"learning_rate": 8.65875652586616e-06,
"loss": 0.5187,
"step": 1520
},
{
"epoch": 0.43,
"learning_rate": 8.649264356905554e-06,
"loss": 0.7606,
"step": 1530
},
{
"epoch": 0.43,
"learning_rate": 8.639772187944946e-06,
"loss": 0.3025,
"step": 1540
},
{
"epoch": 0.44,
"learning_rate": 8.630280018984339e-06,
"loss": 0.5177,
"step": 1550
},
{
"epoch": 0.44,
"learning_rate": 8.62078785002373e-06,
"loss": 0.7777,
"step": 1560
},
{
"epoch": 0.44,
"learning_rate": 8.611295681063123e-06,
"loss": 0.4197,
"step": 1570
},
{
"epoch": 0.45,
"learning_rate": 8.601803512102516e-06,
"loss": 0.2935,
"step": 1580
},
{
"epoch": 0.45,
"learning_rate": 8.59231134314191e-06,
"loss": 0.3054,
"step": 1590
},
{
"epoch": 0.45,
"learning_rate": 8.582819174181301e-06,
"loss": 0.3883,
"step": 1600
},
{
"epoch": 0.45,
"learning_rate": 8.573327005220695e-06,
"loss": 0.5324,
"step": 1610
},
{
"epoch": 0.46,
"learning_rate": 8.563834836260086e-06,
"loss": 0.5354,
"step": 1620
},
{
"epoch": 0.46,
"learning_rate": 8.554342667299478e-06,
"loss": 0.6134,
"step": 1630
},
{
"epoch": 0.46,
"learning_rate": 8.544850498338872e-06,
"loss": 0.289,
"step": 1640
},
{
"epoch": 0.47,
"learning_rate": 8.535358329378263e-06,
"loss": 0.2473,
"step": 1650
},
{
"epoch": 0.47,
"learning_rate": 8.525866160417657e-06,
"loss": 0.3148,
"step": 1660
},
{
"epoch": 0.47,
"learning_rate": 8.516373991457048e-06,
"loss": 0.5027,
"step": 1670
},
{
"epoch": 0.47,
"learning_rate": 8.506881822496442e-06,
"loss": 0.24,
"step": 1680
},
{
"epoch": 0.48,
"learning_rate": 8.497389653535834e-06,
"loss": 0.5346,
"step": 1690
},
{
"epoch": 0.48,
"learning_rate": 8.487897484575225e-06,
"loss": 0.5567,
"step": 1700
},
{
"epoch": 0.48,
"learning_rate": 8.478405315614619e-06,
"loss": 0.3816,
"step": 1710
},
{
"epoch": 0.48,
"learning_rate": 8.46891314665401e-06,
"loss": 0.499,
"step": 1720
},
{
"epoch": 0.49,
"learning_rate": 8.459420977693404e-06,
"loss": 0.6085,
"step": 1730
},
{
"epoch": 0.49,
"learning_rate": 8.449928808732796e-06,
"loss": 0.5301,
"step": 1740
},
{
"epoch": 0.49,
"learning_rate": 8.440436639772188e-06,
"loss": 0.5552,
"step": 1750
},
{
"epoch": 0.5,
"learning_rate": 8.430944470811581e-06,
"loss": 0.3411,
"step": 1760
},
{
"epoch": 0.5,
"learning_rate": 8.421452301850974e-06,
"loss": 0.9363,
"step": 1770
},
{
"epoch": 0.5,
"learning_rate": 8.411960132890366e-06,
"loss": 0.6064,
"step": 1780
},
{
"epoch": 0.5,
"learning_rate": 8.40246796392976e-06,
"loss": 0.5264,
"step": 1790
},
{
"epoch": 0.51,
"learning_rate": 8.392975794969151e-06,
"loss": 0.4054,
"step": 1800
},
{
"epoch": 0.51,
"learning_rate": 8.383483626008543e-06,
"loss": 0.4287,
"step": 1810
},
{
"epoch": 0.51,
"learning_rate": 8.373991457047937e-06,
"loss": 0.1082,
"step": 1820
},
{
"epoch": 0.52,
"learning_rate": 8.364499288087328e-06,
"loss": 0.6657,
"step": 1830
},
{
"epoch": 0.52,
"learning_rate": 8.355007119126722e-06,
"loss": 0.5962,
"step": 1840
},
{
"epoch": 0.52,
"learning_rate": 8.345514950166114e-06,
"loss": 0.4596,
"step": 1850
},
{
"epoch": 0.52,
"learning_rate": 8.336022781205505e-06,
"loss": 0.3728,
"step": 1860
},
{
"epoch": 0.53,
"learning_rate": 8.326530612244899e-06,
"loss": 0.5586,
"step": 1870
},
{
"epoch": 0.53,
"learning_rate": 8.31703844328429e-06,
"loss": 0.4651,
"step": 1880
},
{
"epoch": 0.53,
"learning_rate": 8.307546274323684e-06,
"loss": 0.5714,
"step": 1890
},
{
"epoch": 0.54,
"learning_rate": 8.298054105363076e-06,
"loss": 0.525,
"step": 1900
},
{
"epoch": 0.54,
"learning_rate": 8.288561936402469e-06,
"loss": 0.3305,
"step": 1910
},
{
"epoch": 0.54,
"learning_rate": 8.279069767441861e-06,
"loss": 0.5429,
"step": 1920
},
{
"epoch": 0.54,
"learning_rate": 8.269577598481253e-06,
"loss": 0.5503,
"step": 1930
},
{
"epoch": 0.55,
"learning_rate": 8.260085429520646e-06,
"loss": 0.303,
"step": 1940
},
{
"epoch": 0.55,
"learning_rate": 8.25059326056004e-06,
"loss": 0.4068,
"step": 1950
},
{
"epoch": 0.55,
"learning_rate": 8.241101091599431e-06,
"loss": 0.1963,
"step": 1960
},
{
"epoch": 0.56,
"learning_rate": 8.231608922638825e-06,
"loss": 0.2071,
"step": 1970
},
{
"epoch": 0.56,
"learning_rate": 8.222116753678216e-06,
"loss": 0.3768,
"step": 1980
},
{
"epoch": 0.56,
"learning_rate": 8.212624584717608e-06,
"loss": 0.3167,
"step": 1990
},
{
"epoch": 0.56,
"learning_rate": 8.203132415757002e-06,
"loss": 0.5134,
"step": 2000
},
{
"epoch": 0.56,
"eval_loss": 0.4093641936779022,
"eval_runtime": 211.8311,
"eval_samples_per_second": 9.541,
"eval_steps_per_second": 2.389,
"step": 2000
},
{
"epoch": 0.57,
"learning_rate": 8.193640246796393e-06,
"loss": 0.5266,
"step": 2010
},
{
"epoch": 0.57,
"learning_rate": 8.184148077835787e-06,
"loss": 0.3986,
"step": 2020
},
{
"epoch": 0.57,
"learning_rate": 8.174655908875179e-06,
"loss": 0.4266,
"step": 2030
},
{
"epoch": 0.58,
"learning_rate": 8.16516373991457e-06,
"loss": 0.3662,
"step": 2040
},
{
"epoch": 0.58,
"learning_rate": 8.155671570953964e-06,
"loss": 0.6357,
"step": 2050
},
{
"epoch": 0.58,
"learning_rate": 8.146179401993356e-06,
"loss": 0.6372,
"step": 2060
},
{
"epoch": 0.58,
"learning_rate": 8.136687233032749e-06,
"loss": 0.5443,
"step": 2070
},
{
"epoch": 0.59,
"learning_rate": 8.12719506407214e-06,
"loss": 0.2663,
"step": 2080
},
{
"epoch": 0.59,
"learning_rate": 8.117702895111534e-06,
"loss": 0.5385,
"step": 2090
},
{
"epoch": 0.59,
"learning_rate": 8.108210726150926e-06,
"loss": 0.3451,
"step": 2100
},
{
"epoch": 0.59,
"learning_rate": 8.098718557190318e-06,
"loss": 0.5365,
"step": 2110
},
{
"epoch": 0.6,
"learning_rate": 8.089226388229711e-06,
"loss": 0.4255,
"step": 2120
},
{
"epoch": 0.6,
"learning_rate": 8.079734219269105e-06,
"loss": 0.4673,
"step": 2130
},
{
"epoch": 0.6,
"learning_rate": 8.070242050308496e-06,
"loss": 0.7596,
"step": 2140
},
{
"epoch": 0.61,
"learning_rate": 8.06074988134789e-06,
"loss": 0.2713,
"step": 2150
},
{
"epoch": 0.61,
"learning_rate": 8.051257712387281e-06,
"loss": 0.3279,
"step": 2160
},
{
"epoch": 0.61,
"learning_rate": 8.041765543426673e-06,
"loss": 0.7999,
"step": 2170
},
{
"epoch": 0.61,
"learning_rate": 8.032273374466067e-06,
"loss": 0.4163,
"step": 2180
},
{
"epoch": 0.62,
"learning_rate": 8.022781205505458e-06,
"loss": 0.2294,
"step": 2190
},
{
"epoch": 0.62,
"learning_rate": 8.013289036544852e-06,
"loss": 0.6922,
"step": 2200
},
{
"epoch": 0.62,
"learning_rate": 8.003796867584244e-06,
"loss": 0.656,
"step": 2210
},
{
"epoch": 0.63,
"learning_rate": 7.994304698623635e-06,
"loss": 0.407,
"step": 2220
},
{
"epoch": 0.63,
"learning_rate": 7.984812529663029e-06,
"loss": 0.4617,
"step": 2230
},
{
"epoch": 0.63,
"learning_rate": 7.97532036070242e-06,
"loss": 0.4542,
"step": 2240
},
{
"epoch": 0.63,
"learning_rate": 7.965828191741814e-06,
"loss": 0.5353,
"step": 2250
},
{
"epoch": 0.64,
"learning_rate": 7.956336022781206e-06,
"loss": 0.3015,
"step": 2260
},
{
"epoch": 0.64,
"learning_rate": 7.946843853820598e-06,
"loss": 0.5876,
"step": 2270
},
{
"epoch": 0.64,
"learning_rate": 7.937351684859991e-06,
"loss": 0.44,
"step": 2280
},
{
"epoch": 0.65,
"learning_rate": 7.927859515899383e-06,
"loss": 0.3684,
"step": 2290
},
{
"epoch": 0.65,
"learning_rate": 7.918367346938776e-06,
"loss": 0.3488,
"step": 2300
},
{
"epoch": 0.65,
"learning_rate": 7.90887517797817e-06,
"loss": 0.4936,
"step": 2310
},
{
"epoch": 0.65,
"learning_rate": 7.899383009017561e-06,
"loss": 0.4513,
"step": 2320
},
{
"epoch": 0.66,
"learning_rate": 7.889890840056955e-06,
"loss": 0.4719,
"step": 2330
},
{
"epoch": 0.66,
"learning_rate": 7.880398671096347e-06,
"loss": 0.4911,
"step": 2340
},
{
"epoch": 0.66,
"learning_rate": 7.870906502135738e-06,
"loss": 0.5195,
"step": 2350
},
{
"epoch": 0.67,
"learning_rate": 7.861414333175132e-06,
"loss": 0.2038,
"step": 2360
},
{
"epoch": 0.67,
"learning_rate": 7.851922164214523e-06,
"loss": 0.3553,
"step": 2370
},
{
"epoch": 0.67,
"learning_rate": 7.842429995253917e-06,
"loss": 0.5719,
"step": 2380
},
{
"epoch": 0.67,
"learning_rate": 7.832937826293309e-06,
"loss": 0.7717,
"step": 2390
},
{
"epoch": 0.68,
"learning_rate": 7.8234456573327e-06,
"loss": 0.1035,
"step": 2400
},
{
"epoch": 0.68,
"learning_rate": 7.813953488372094e-06,
"loss": 0.5225,
"step": 2410
},
{
"epoch": 0.68,
"learning_rate": 7.804461319411486e-06,
"loss": 0.4826,
"step": 2420
},
{
"epoch": 0.69,
"learning_rate": 7.794969150450879e-06,
"loss": 0.573,
"step": 2430
},
{
"epoch": 0.69,
"learning_rate": 7.78547698149027e-06,
"loss": 0.2182,
"step": 2440
},
{
"epoch": 0.69,
"learning_rate": 7.775984812529663e-06,
"loss": 0.6481,
"step": 2450
},
{
"epoch": 0.69,
"learning_rate": 7.766492643569056e-06,
"loss": 0.3904,
"step": 2460
},
{
"epoch": 0.7,
"learning_rate": 7.757000474608448e-06,
"loss": 0.7893,
"step": 2470
},
{
"epoch": 0.7,
"learning_rate": 7.747508305647841e-06,
"loss": 0.4534,
"step": 2480
},
{
"epoch": 0.7,
"learning_rate": 7.738016136687235e-06,
"loss": 0.6228,
"step": 2490
},
{
"epoch": 0.7,
"learning_rate": 7.728523967726626e-06,
"loss": 0.345,
"step": 2500
},
{
"epoch": 0.7,
"eval_loss": 0.3839055895805359,
"eval_runtime": 210.4029,
"eval_samples_per_second": 9.605,
"eval_steps_per_second": 2.405,
"step": 2500
},
{
"epoch": 0.71,
"learning_rate": 7.71903179876602e-06,
"loss": 0.5411,
"step": 2510
},
{
"epoch": 0.71,
"learning_rate": 7.709539629805412e-06,
"loss": 0.5758,
"step": 2520
},
{
"epoch": 0.71,
"learning_rate": 7.700047460844803e-06,
"loss": 0.3009,
"step": 2530
},
{
"epoch": 0.72,
"learning_rate": 7.690555291884197e-06,
"loss": 0.5573,
"step": 2540
},
{
"epoch": 0.72,
"learning_rate": 7.681063122923589e-06,
"loss": 0.627,
"step": 2550
},
{
"epoch": 0.72,
"learning_rate": 7.671570953962982e-06,
"loss": 0.5643,
"step": 2560
},
{
"epoch": 0.72,
"learning_rate": 7.662078785002374e-06,
"loss": 0.6044,
"step": 2570
},
{
"epoch": 0.73,
"learning_rate": 7.652586616041765e-06,
"loss": 0.3706,
"step": 2580
},
{
"epoch": 0.73,
"learning_rate": 7.643094447081159e-06,
"loss": 0.4161,
"step": 2590
},
{
"epoch": 0.73,
"learning_rate": 7.63360227812055e-06,
"loss": 0.4195,
"step": 2600
},
{
"epoch": 0.74,
"learning_rate": 7.624110109159943e-06,
"loss": 0.619,
"step": 2610
},
{
"epoch": 0.74,
"learning_rate": 7.614617940199336e-06,
"loss": 0.5089,
"step": 2620
},
{
"epoch": 0.74,
"learning_rate": 7.6051257712387284e-06,
"loss": 0.288,
"step": 2630
},
{
"epoch": 0.74,
"learning_rate": 7.595633602278121e-06,
"loss": 0.3765,
"step": 2640
},
{
"epoch": 0.75,
"learning_rate": 7.586141433317513e-06,
"loss": 0.6771,
"step": 2650
},
{
"epoch": 0.75,
"learning_rate": 7.576649264356905e-06,
"loss": 0.3748,
"step": 2660
},
{
"epoch": 0.75,
"learning_rate": 7.567157095396299e-06,
"loss": 0.5745,
"step": 2670
},
{
"epoch": 0.76,
"learning_rate": 7.557664926435691e-06,
"loss": 0.6296,
"step": 2680
},
{
"epoch": 0.76,
"learning_rate": 7.548172757475084e-06,
"loss": 0.3589,
"step": 2690
},
{
"epoch": 0.76,
"learning_rate": 7.538680588514477e-06,
"loss": 0.6862,
"step": 2700
},
{
"epoch": 0.76,
"learning_rate": 7.529188419553869e-06,
"loss": 0.5385,
"step": 2710
},
{
"epoch": 0.77,
"learning_rate": 7.519696250593261e-06,
"loss": 0.2568,
"step": 2720
},
{
"epoch": 0.77,
"learning_rate": 7.5102040816326536e-06,
"loss": 0.3519,
"step": 2730
},
{
"epoch": 0.77,
"learning_rate": 7.500711912672046e-06,
"loss": 0.3583,
"step": 2740
},
{
"epoch": 0.78,
"learning_rate": 7.491219743711439e-06,
"loss": 0.4923,
"step": 2750
},
{
"epoch": 0.78,
"learning_rate": 7.481727574750831e-06,
"loss": 0.3252,
"step": 2760
},
{
"epoch": 0.78,
"learning_rate": 7.472235405790224e-06,
"loss": 0.4134,
"step": 2770
},
{
"epoch": 0.78,
"learning_rate": 7.462743236829616e-06,
"loss": 0.4374,
"step": 2780
},
{
"epoch": 0.79,
"learning_rate": 7.453251067869008e-06,
"loss": 0.2847,
"step": 2790
},
{
"epoch": 0.79,
"learning_rate": 7.443758898908401e-06,
"loss": 0.3431,
"step": 2800
},
{
"epoch": 0.79,
"learning_rate": 7.4342667299477935e-06,
"loss": 0.3763,
"step": 2810
},
{
"epoch": 0.8,
"learning_rate": 7.424774560987186e-06,
"loss": 0.4108,
"step": 2820
},
{
"epoch": 0.8,
"learning_rate": 7.415282392026578e-06,
"loss": 0.2789,
"step": 2830
},
{
"epoch": 0.8,
"learning_rate": 7.405790223065972e-06,
"loss": 0.2425,
"step": 2840
},
{
"epoch": 0.8,
"learning_rate": 7.396298054105364e-06,
"loss": 0.3791,
"step": 2850
},
{
"epoch": 0.81,
"learning_rate": 7.3868058851447565e-06,
"loss": 0.5064,
"step": 2860
},
{
"epoch": 0.81,
"learning_rate": 7.377313716184149e-06,
"loss": 0.3999,
"step": 2870
},
{
"epoch": 0.81,
"learning_rate": 7.367821547223542e-06,
"loss": 0.4088,
"step": 2880
},
{
"epoch": 0.81,
"learning_rate": 7.358329378262934e-06,
"loss": 0.3158,
"step": 2890
},
{
"epoch": 0.82,
"learning_rate": 7.348837209302326e-06,
"loss": 0.3839,
"step": 2900
},
{
"epoch": 0.82,
"learning_rate": 7.339345040341719e-06,
"loss": 0.7998,
"step": 2910
},
{
"epoch": 0.82,
"learning_rate": 7.329852871381111e-06,
"loss": 0.4684,
"step": 2920
},
{
"epoch": 0.83,
"learning_rate": 7.320360702420504e-06,
"loss": 0.3617,
"step": 2930
},
{
"epoch": 0.83,
"learning_rate": 7.310868533459896e-06,
"loss": 0.3617,
"step": 2940
},
{
"epoch": 0.83,
"learning_rate": 7.301376364499288e-06,
"loss": 0.4053,
"step": 2950
},
{
"epoch": 0.83,
"learning_rate": 7.291884195538681e-06,
"loss": 0.4896,
"step": 2960
},
{
"epoch": 0.84,
"learning_rate": 7.282392026578073e-06,
"loss": 0.385,
"step": 2970
},
{
"epoch": 0.84,
"learning_rate": 7.272899857617466e-06,
"loss": 0.376,
"step": 2980
},
{
"epoch": 0.84,
"learning_rate": 7.2634076886568585e-06,
"loss": 0.49,
"step": 2990
},
{
"epoch": 0.85,
"learning_rate": 7.253915519696251e-06,
"loss": 0.3913,
"step": 3000
},
{
"epoch": 0.85,
"eval_loss": 0.37262919545173645,
"eval_runtime": 210.8205,
"eval_samples_per_second": 9.586,
"eval_steps_per_second": 2.4,
"step": 3000
},
{
"epoch": 0.85,
"learning_rate": 7.244423350735643e-06,
"loss": 0.5203,
"step": 3010
},
{
"epoch": 0.85,
"learning_rate": 7.234931181775036e-06,
"loss": 0.2866,
"step": 3020
},
{
"epoch": 0.85,
"learning_rate": 7.225439012814429e-06,
"loss": 0.322,
"step": 3030
},
{
"epoch": 0.86,
"learning_rate": 7.2159468438538215e-06,
"loss": 0.4069,
"step": 3040
},
{
"epoch": 0.86,
"learning_rate": 7.206454674893214e-06,
"loss": 0.4716,
"step": 3050
},
{
"epoch": 0.86,
"learning_rate": 7.196962505932607e-06,
"loss": 0.6109,
"step": 3060
},
{
"epoch": 0.87,
"learning_rate": 7.187470336971999e-06,
"loss": 0.5786,
"step": 3070
},
{
"epoch": 0.87,
"learning_rate": 7.177978168011391e-06,
"loss": 0.5905,
"step": 3080
},
{
"epoch": 0.87,
"learning_rate": 7.168485999050784e-06,
"loss": 0.425,
"step": 3090
},
{
"epoch": 0.87,
"learning_rate": 7.158993830090176e-06,
"loss": 0.3144,
"step": 3100
},
{
"epoch": 0.88,
"learning_rate": 7.149501661129569e-06,
"loss": 0.4081,
"step": 3110
},
{
"epoch": 0.88,
"learning_rate": 7.1400094921689614e-06,
"loss": 0.3787,
"step": 3120
},
{
"epoch": 0.88,
"learning_rate": 7.130517323208353e-06,
"loss": 0.2301,
"step": 3130
},
{
"epoch": 0.89,
"learning_rate": 7.121025154247746e-06,
"loss": 0.318,
"step": 3140
},
{
"epoch": 0.89,
"learning_rate": 7.111532985287138e-06,
"loss": 0.4586,
"step": 3150
},
{
"epoch": 0.89,
"learning_rate": 7.102040816326531e-06,
"loss": 0.4389,
"step": 3160
},
{
"epoch": 0.89,
"learning_rate": 7.0925486473659236e-06,
"loss": 0.473,
"step": 3170
},
{
"epoch": 0.9,
"learning_rate": 7.083056478405316e-06,
"loss": 0.1068,
"step": 3180
},
{
"epoch": 0.9,
"learning_rate": 7.073564309444708e-06,
"loss": 0.243,
"step": 3190
},
{
"epoch": 0.9,
"learning_rate": 7.064072140484101e-06,
"loss": 0.3434,
"step": 3200
},
{
"epoch": 0.9,
"learning_rate": 7.054579971523494e-06,
"loss": 0.5638,
"step": 3210
},
{
"epoch": 0.91,
"learning_rate": 7.0450878025628865e-06,
"loss": 0.448,
"step": 3220
},
{
"epoch": 0.91,
"learning_rate": 7.035595633602279e-06,
"loss": 0.6347,
"step": 3230
},
{
"epoch": 0.91,
"learning_rate": 7.026103464641672e-06,
"loss": 0.7818,
"step": 3240
},
{
"epoch": 0.92,
"learning_rate": 7.016611295681064e-06,
"loss": 0.4482,
"step": 3250
},
{
"epoch": 0.92,
"learning_rate": 7.007119126720456e-06,
"loss": 0.6219,
"step": 3260
},
{
"epoch": 0.92,
"learning_rate": 6.997626957759849e-06,
"loss": 0.2313,
"step": 3270
},
{
"epoch": 0.92,
"learning_rate": 6.988134788799241e-06,
"loss": 0.1876,
"step": 3280
},
{
"epoch": 0.93,
"learning_rate": 6.978642619838634e-06,
"loss": 0.4771,
"step": 3290
},
{
"epoch": 0.93,
"learning_rate": 6.9691504508780265e-06,
"loss": 0.4634,
"step": 3300
},
{
"epoch": 0.93,
"learning_rate": 6.959658281917418e-06,
"loss": 0.489,
"step": 3310
},
{
"epoch": 0.94,
"learning_rate": 6.950166112956811e-06,
"loss": 0.3812,
"step": 3320
},
{
"epoch": 0.94,
"learning_rate": 6.940673943996203e-06,
"loss": 0.4829,
"step": 3330
},
{
"epoch": 0.94,
"learning_rate": 6.931181775035596e-06,
"loss": 0.495,
"step": 3340
},
{
"epoch": 0.94,
"learning_rate": 6.921689606074989e-06,
"loss": 0.4593,
"step": 3350
},
{
"epoch": 0.95,
"learning_rate": 6.91219743711438e-06,
"loss": 0.3818,
"step": 3360
},
{
"epoch": 0.95,
"learning_rate": 6.902705268153773e-06,
"loss": 0.3838,
"step": 3370
},
{
"epoch": 0.95,
"learning_rate": 6.893213099193166e-06,
"loss": 0.2638,
"step": 3380
},
{
"epoch": 0.96,
"learning_rate": 6.883720930232559e-06,
"loss": 0.3994,
"step": 3390
},
{
"epoch": 0.96,
"learning_rate": 6.874228761271952e-06,
"loss": 0.2879,
"step": 3400
},
{
"epoch": 0.96,
"learning_rate": 6.864736592311344e-06,
"loss": 0.5988,
"step": 3410
},
{
"epoch": 0.96,
"learning_rate": 6.855244423350737e-06,
"loss": 0.4441,
"step": 3420
},
{
"epoch": 0.97,
"learning_rate": 6.8457522543901285e-06,
"loss": 0.4185,
"step": 3430
},
{
"epoch": 0.97,
"learning_rate": 6.836260085429521e-06,
"loss": 0.4659,
"step": 3440
},
{
"epoch": 0.97,
"learning_rate": 6.826767916468914e-06,
"loss": 0.4107,
"step": 3450
},
{
"epoch": 0.98,
"learning_rate": 6.817275747508306e-06,
"loss": 0.2984,
"step": 3460
},
{
"epoch": 0.98,
"learning_rate": 6.807783578547699e-06,
"loss": 0.5467,
"step": 3470
},
{
"epoch": 0.98,
"learning_rate": 6.7982914095870915e-06,
"loss": 0.5183,
"step": 3480
},
{
"epoch": 0.98,
"learning_rate": 6.788799240626483e-06,
"loss": 0.1466,
"step": 3490
},
{
"epoch": 0.99,
"learning_rate": 6.779307071665876e-06,
"loss": 0.2805,
"step": 3500
},
{
"epoch": 0.99,
"eval_loss": 0.36545732617378235,
"eval_runtime": 211.5781,
"eval_samples_per_second": 9.552,
"eval_steps_per_second": 2.392,
"step": 3500
},
{
"epoch": 0.99,
"learning_rate": 6.7698149027052685e-06,
"loss": 0.7174,
"step": 3510
},
{
"epoch": 0.99,
"learning_rate": 6.760322733744661e-06,
"loss": 0.6719,
"step": 3520
},
{
"epoch": 1.0,
"learning_rate": 6.750830564784054e-06,
"loss": 0.4309,
"step": 3530
},
{
"epoch": 1.0,
"learning_rate": 6.741338395823445e-06,
"loss": 0.2661,
"step": 3540
},
{
"epoch": 1.0,
"learning_rate": 6.731846226862838e-06,
"loss": 0.5369,
"step": 3550
},
{
"epoch": 1.0,
"learning_rate": 6.7223540579022314e-06,
"loss": 0.3933,
"step": 3560
},
{
"epoch": 1.01,
"learning_rate": 6.712861888941624e-06,
"loss": 0.456,
"step": 3570
},
{
"epoch": 1.01,
"learning_rate": 6.703369719981017e-06,
"loss": 0.7618,
"step": 3580
},
{
"epoch": 1.01,
"learning_rate": 6.693877551020409e-06,
"loss": 0.5126,
"step": 3590
},
{
"epoch": 1.01,
"learning_rate": 6.684385382059802e-06,
"loss": 0.2889,
"step": 3600
},
{
"epoch": 1.02,
"learning_rate": 6.6748932130991936e-06,
"loss": 0.6109,
"step": 3610
},
{
"epoch": 1.02,
"learning_rate": 6.665401044138586e-06,
"loss": 0.3158,
"step": 3620
},
{
"epoch": 1.02,
"learning_rate": 6.655908875177979e-06,
"loss": 0.4721,
"step": 3630
},
{
"epoch": 1.03,
"learning_rate": 6.646416706217371e-06,
"loss": 0.4659,
"step": 3640
},
{
"epoch": 1.03,
"learning_rate": 6.636924537256764e-06,
"loss": 0.4783,
"step": 3650
},
{
"epoch": 1.03,
"learning_rate": 6.627432368296156e-06,
"loss": 0.3333,
"step": 3660
},
{
"epoch": 1.03,
"learning_rate": 6.617940199335548e-06,
"loss": 0.3447,
"step": 3670
},
{
"epoch": 1.04,
"learning_rate": 6.608448030374941e-06,
"loss": 0.5593,
"step": 3680
},
{
"epoch": 1.04,
"learning_rate": 6.5989558614143335e-06,
"loss": 0.3474,
"step": 3690
},
{
"epoch": 1.04,
"learning_rate": 6.589463692453726e-06,
"loss": 0.5889,
"step": 3700
},
{
"epoch": 1.05,
"learning_rate": 6.579971523493119e-06,
"loss": 0.1262,
"step": 3710
},
{
"epoch": 1.05,
"learning_rate": 6.5704793545325104e-06,
"loss": 0.5701,
"step": 3720
},
{
"epoch": 1.05,
"learning_rate": 6.560987185571903e-06,
"loss": 0.6356,
"step": 3730
},
{
"epoch": 1.05,
"learning_rate": 6.5514950166112965e-06,
"loss": 0.5827,
"step": 3740
},
{
"epoch": 1.06,
"learning_rate": 6.542002847650689e-06,
"loss": 0.4105,
"step": 3750
},
{
"epoch": 1.06,
"learning_rate": 6.532510678690082e-06,
"loss": 0.4191,
"step": 3760
},
{
"epoch": 1.06,
"learning_rate": 6.523018509729474e-06,
"loss": 0.3027,
"step": 3770
},
{
"epoch": 1.07,
"learning_rate": 6.513526340768867e-06,
"loss": 0.395,
"step": 3780
},
{
"epoch": 1.07,
"learning_rate": 6.504034171808259e-06,
"loss": 0.2587,
"step": 3790
},
{
"epoch": 1.07,
"learning_rate": 6.494542002847651e-06,
"loss": 0.2284,
"step": 3800
},
{
"epoch": 1.07,
"learning_rate": 6.485049833887044e-06,
"loss": 0.4834,
"step": 3810
},
{
"epoch": 1.08,
"learning_rate": 6.475557664926436e-06,
"loss": 0.1843,
"step": 3820
},
{
"epoch": 1.08,
"learning_rate": 6.466065495965829e-06,
"loss": 0.5196,
"step": 3830
},
{
"epoch": 1.08,
"learning_rate": 6.456573327005221e-06,
"loss": 0.4602,
"step": 3840
},
{
"epoch": 1.09,
"learning_rate": 6.447081158044613e-06,
"loss": 0.1406,
"step": 3850
},
{
"epoch": 1.09,
"learning_rate": 6.437588989084006e-06,
"loss": 0.3493,
"step": 3860
},
{
"epoch": 1.09,
"learning_rate": 6.4280968201233985e-06,
"loss": 0.4312,
"step": 3870
},
{
"epoch": 1.09,
"learning_rate": 6.418604651162791e-06,
"loss": 0.5843,
"step": 3880
},
{
"epoch": 1.1,
"learning_rate": 6.409112482202183e-06,
"loss": 0.4368,
"step": 3890
},
{
"epoch": 1.1,
"learning_rate": 6.3996203132415755e-06,
"loss": 0.4638,
"step": 3900
},
{
"epoch": 1.1,
"learning_rate": 6.390128144280968e-06,
"loss": 0.5721,
"step": 3910
},
{
"epoch": 1.11,
"learning_rate": 6.3806359753203615e-06,
"loss": 0.2774,
"step": 3920
},
{
"epoch": 1.11,
"learning_rate": 6.371143806359754e-06,
"loss": 0.641,
"step": 3930
},
{
"epoch": 1.11,
"learning_rate": 6.361651637399147e-06,
"loss": 0.3003,
"step": 3940
},
{
"epoch": 1.11,
"learning_rate": 6.352159468438539e-06,
"loss": 0.5912,
"step": 3950
},
{
"epoch": 1.12,
"learning_rate": 6.342667299477931e-06,
"loss": 0.5673,
"step": 3960
},
{
"epoch": 1.12,
"learning_rate": 6.333175130517324e-06,
"loss": 0.3721,
"step": 3970
},
{
"epoch": 1.12,
"learning_rate": 6.323682961556716e-06,
"loss": 0.5748,
"step": 3980
},
{
"epoch": 1.12,
"learning_rate": 6.314190792596109e-06,
"loss": 0.384,
"step": 3990
},
{
"epoch": 1.13,
"learning_rate": 6.3046986236355014e-06,
"loss": 0.6733,
"step": 4000
},
{
"epoch": 1.13,
"eval_loss": 0.3598354458808899,
"eval_runtime": 209.9575,
"eval_samples_per_second": 9.626,
"eval_steps_per_second": 2.41,
"step": 4000
},
{
"epoch": 1.13,
"learning_rate": 6.295206454674894e-06,
"loss": 0.29,
"step": 4010
},
{
"epoch": 1.13,
"learning_rate": 6.285714285714286e-06,
"loss": 0.2849,
"step": 4020
},
{
"epoch": 1.14,
"learning_rate": 6.276222116753678e-06,
"loss": 0.5834,
"step": 4030
},
{
"epoch": 1.14,
"learning_rate": 6.266729947793071e-06,
"loss": 0.1479,
"step": 4040
},
{
"epoch": 1.14,
"learning_rate": 6.257237778832464e-06,
"loss": 0.5803,
"step": 4050
},
{
"epoch": 1.14,
"learning_rate": 6.247745609871856e-06,
"loss": 0.2966,
"step": 4060
},
{
"epoch": 1.15,
"learning_rate": 6.238253440911248e-06,
"loss": 0.3872,
"step": 4070
},
{
"epoch": 1.15,
"learning_rate": 6.2287612719506405e-06,
"loss": 0.4973,
"step": 4080
},
{
"epoch": 1.15,
"learning_rate": 6.219269102990034e-06,
"loss": 0.6415,
"step": 4090
},
{
"epoch": 1.16,
"learning_rate": 6.2097769340294266e-06,
"loss": 0.4729,
"step": 4100
},
{
"epoch": 1.16,
"learning_rate": 6.200284765068819e-06,
"loss": 0.5401,
"step": 4110
},
{
"epoch": 1.16,
"learning_rate": 6.190792596108212e-06,
"loss": 0.2515,
"step": 4120
},
{
"epoch": 1.16,
"learning_rate": 6.181300427147604e-06,
"loss": 0.3167,
"step": 4130
},
{
"epoch": 1.17,
"learning_rate": 6.171808258186996e-06,
"loss": 0.6597,
"step": 4140
},
{
"epoch": 1.17,
"learning_rate": 6.162316089226389e-06,
"loss": 0.4192,
"step": 4150
},
{
"epoch": 1.17,
"learning_rate": 6.152823920265781e-06,
"loss": 0.2399,
"step": 4160
},
{
"epoch": 1.18,
"learning_rate": 6.143331751305174e-06,
"loss": 0.3981,
"step": 4170
},
{
"epoch": 1.18,
"learning_rate": 6.1338395823445665e-06,
"loss": 0.3968,
"step": 4180
},
{
"epoch": 1.18,
"learning_rate": 6.124347413383958e-06,
"loss": 0.3704,
"step": 4190
},
{
"epoch": 1.18,
"learning_rate": 6.114855244423351e-06,
"loss": 0.4162,
"step": 4200
},
{
"epoch": 1.19,
"learning_rate": 6.1053630754627434e-06,
"loss": 0.3179,
"step": 4210
},
{
"epoch": 1.19,
"learning_rate": 6.095870906502136e-06,
"loss": 0.4292,
"step": 4220
},
{
"epoch": 1.19,
"learning_rate": 6.086378737541529e-06,
"loss": 0.4461,
"step": 4230
},
{
"epoch": 1.2,
"learning_rate": 6.076886568580921e-06,
"loss": 0.4048,
"step": 4240
},
{
"epoch": 1.2,
"learning_rate": 6.067394399620313e-06,
"loss": 0.4935,
"step": 4250
},
{
"epoch": 1.2,
"learning_rate": 6.0579022306597056e-06,
"loss": 0.2367,
"step": 4260
},
{
"epoch": 1.2,
"learning_rate": 6.048410061699099e-06,
"loss": 0.354,
"step": 4270
},
{
"epoch": 1.21,
"learning_rate": 6.038917892738492e-06,
"loss": 0.705,
"step": 4280
},
{
"epoch": 1.21,
"learning_rate": 6.029425723777884e-06,
"loss": 0.5404,
"step": 4290
},
{
"epoch": 1.21,
"learning_rate": 6.019933554817277e-06,
"loss": 0.4148,
"step": 4300
},
{
"epoch": 1.22,
"learning_rate": 6.010441385856669e-06,
"loss": 0.2862,
"step": 4310
},
{
"epoch": 1.22,
"learning_rate": 6.000949216896061e-06,
"loss": 0.5574,
"step": 4320
},
{
"epoch": 1.22,
"learning_rate": 5.991457047935454e-06,
"loss": 0.3857,
"step": 4330
},
{
"epoch": 1.22,
"learning_rate": 5.981964878974846e-06,
"loss": 0.4672,
"step": 4340
},
{
"epoch": 1.23,
"learning_rate": 5.972472710014239e-06,
"loss": 0.5925,
"step": 4350
},
{
"epoch": 1.23,
"learning_rate": 5.9629805410536315e-06,
"loss": 0.4423,
"step": 4360
},
{
"epoch": 1.23,
"learning_rate": 5.953488372093023e-06,
"loss": 0.4828,
"step": 4370
},
{
"epoch": 1.23,
"learning_rate": 5.943996203132416e-06,
"loss": 0.5208,
"step": 4380
},
{
"epoch": 1.24,
"learning_rate": 5.9345040341718085e-06,
"loss": 0.3895,
"step": 4390
},
{
"epoch": 1.24,
"learning_rate": 5.925011865211201e-06,
"loss": 0.4667,
"step": 4400
},
{
"epoch": 1.24,
"learning_rate": 5.915519696250594e-06,
"loss": 0.5809,
"step": 4410
},
{
"epoch": 1.25,
"learning_rate": 5.906027527289986e-06,
"loss": 0.4157,
"step": 4420
},
{
"epoch": 1.25,
"learning_rate": 5.896535358329378e-06,
"loss": 0.2807,
"step": 4430
},
{
"epoch": 1.25,
"learning_rate": 5.887043189368771e-06,
"loss": 0.5036,
"step": 4440
},
{
"epoch": 1.25,
"learning_rate": 5.877551020408164e-06,
"loss": 0.3498,
"step": 4450
},
{
"epoch": 1.26,
"learning_rate": 5.868058851447557e-06,
"loss": 0.46,
"step": 4460
},
{
"epoch": 1.26,
"learning_rate": 5.858566682486949e-06,
"loss": 0.2416,
"step": 4470
},
{
"epoch": 1.26,
"learning_rate": 5.849074513526342e-06,
"loss": 0.3104,
"step": 4480
},
{
"epoch": 1.27,
"learning_rate": 5.8395823445657344e-06,
"loss": 0.6617,
"step": 4490
},
{
"epoch": 1.27,
"learning_rate": 5.830090175605126e-06,
"loss": 0.2603,
"step": 4500
},
{
"epoch": 1.27,
"eval_loss": 0.3565267324447632,
"eval_runtime": 210.4541,
"eval_samples_per_second": 9.603,
"eval_steps_per_second": 2.404,
"step": 4500
},
{
"epoch": 1.27,
"learning_rate": 5.820598006644519e-06,
"loss": 0.1701,
"step": 4510
},
{
"epoch": 1.27,
"learning_rate": 5.811105837683911e-06,
"loss": 0.3745,
"step": 4520
},
{
"epoch": 1.28,
"learning_rate": 5.801613668723304e-06,
"loss": 0.296,
"step": 4530
},
{
"epoch": 1.28,
"learning_rate": 5.7921214997626966e-06,
"loss": 0.1971,
"step": 4540
},
{
"epoch": 1.28,
"learning_rate": 5.782629330802088e-06,
"loss": 0.3824,
"step": 4550
},
{
"epoch": 1.29,
"learning_rate": 5.773137161841481e-06,
"loss": 0.4496,
"step": 4560
},
{
"epoch": 1.29,
"learning_rate": 5.7636449928808735e-06,
"loss": 0.3678,
"step": 4570
},
{
"epoch": 1.29,
"learning_rate": 5.754152823920266e-06,
"loss": 0.368,
"step": 4580
},
{
"epoch": 1.29,
"learning_rate": 5.744660654959659e-06,
"loss": 0.4836,
"step": 4590
},
{
"epoch": 1.3,
"learning_rate": 5.7351684859990505e-06,
"loss": 0.4723,
"step": 4600
},
{
"epoch": 1.3,
"learning_rate": 5.725676317038443e-06,
"loss": 0.2437,
"step": 4610
},
{
"epoch": 1.3,
"learning_rate": 5.716184148077836e-06,
"loss": 0.1526,
"step": 4620
},
{
"epoch": 1.31,
"learning_rate": 5.706691979117229e-06,
"loss": 0.1451,
"step": 4630
},
{
"epoch": 1.31,
"learning_rate": 5.697199810156622e-06,
"loss": 0.4266,
"step": 4640
},
{
"epoch": 1.31,
"learning_rate": 5.687707641196014e-06,
"loss": 0.5522,
"step": 4650
},
{
"epoch": 1.31,
"learning_rate": 5.678215472235407e-06,
"loss": 0.4187,
"step": 4660
},
{
"epoch": 1.32,
"learning_rate": 5.668723303274799e-06,
"loss": 0.3749,
"step": 4670
},
{
"epoch": 1.32,
"learning_rate": 5.659231134314191e-06,
"loss": 0.156,
"step": 4680
},
{
"epoch": 1.32,
"learning_rate": 5.649738965353584e-06,
"loss": 0.1152,
"step": 4690
},
{
"epoch": 1.33,
"learning_rate": 5.6402467963929764e-06,
"loss": 0.4565,
"step": 4700
},
{
"epoch": 1.33,
"learning_rate": 5.630754627432369e-06,
"loss": 0.328,
"step": 4710
},
{
"epoch": 1.33,
"learning_rate": 5.621262458471762e-06,
"loss": 0.4919,
"step": 4720
},
{
"epoch": 1.33,
"learning_rate": 5.611770289511153e-06,
"loss": 0.3646,
"step": 4730
},
{
"epoch": 1.34,
"learning_rate": 5.602278120550546e-06,
"loss": 0.3677,
"step": 4740
},
{
"epoch": 1.34,
"learning_rate": 5.5927859515899386e-06,
"loss": 0.2724,
"step": 4750
},
{
"epoch": 1.34,
"learning_rate": 5.583293782629331e-06,
"loss": 0.3406,
"step": 4760
},
{
"epoch": 1.34,
"learning_rate": 5.573801613668724e-06,
"loss": 0.4709,
"step": 4770
},
{
"epoch": 1.35,
"learning_rate": 5.5643094447081155e-06,
"loss": 0.3161,
"step": 4780
},
{
"epoch": 1.35,
"learning_rate": 5.554817275747508e-06,
"loss": 0.4555,
"step": 4790
},
{
"epoch": 1.35,
"learning_rate": 5.545325106786901e-06,
"loss": 0.4641,
"step": 4800
},
{
"epoch": 1.36,
"learning_rate": 5.535832937826294e-06,
"loss": 0.4858,
"step": 4810
},
{
"epoch": 1.36,
"learning_rate": 5.526340768865687e-06,
"loss": 0.4066,
"step": 4820
},
{
"epoch": 1.36,
"learning_rate": 5.516848599905079e-06,
"loss": 0.4246,
"step": 4830
},
{
"epoch": 1.36,
"learning_rate": 5.507356430944472e-06,
"loss": 0.3599,
"step": 4840
},
{
"epoch": 1.37,
"learning_rate": 5.497864261983864e-06,
"loss": 0.4388,
"step": 4850
},
{
"epoch": 1.37,
"learning_rate": 5.488372093023256e-06,
"loss": 0.2378,
"step": 4860
},
{
"epoch": 1.37,
"learning_rate": 5.478879924062649e-06,
"loss": 0.512,
"step": 4870
},
{
"epoch": 1.38,
"learning_rate": 5.4693877551020415e-06,
"loss": 0.5356,
"step": 4880
},
{
"epoch": 1.38,
"learning_rate": 5.459895586141434e-06,
"loss": 0.723,
"step": 4890
},
{
"epoch": 1.38,
"learning_rate": 5.450403417180826e-06,
"loss": 0.4442,
"step": 4900
},
{
"epoch": 1.38,
"learning_rate": 5.440911248220218e-06,
"loss": 0.5757,
"step": 4910
},
{
"epoch": 1.39,
"learning_rate": 5.431419079259611e-06,
"loss": 0.3083,
"step": 4920
},
{
"epoch": 1.39,
"learning_rate": 5.421926910299004e-06,
"loss": 0.3306,
"step": 4930
},
{
"epoch": 1.39,
"learning_rate": 5.412434741338396e-06,
"loss": 0.3638,
"step": 4940
},
{
"epoch": 1.4,
"learning_rate": 5.402942572377789e-06,
"loss": 0.3285,
"step": 4950
},
{
"epoch": 1.4,
"learning_rate": 5.3934504034171805e-06,
"loss": 0.5824,
"step": 4960
},
{
"epoch": 1.4,
"learning_rate": 5.383958234456573e-06,
"loss": 0.4002,
"step": 4970
},
{
"epoch": 1.4,
"learning_rate": 5.374466065495966e-06,
"loss": 0.1342,
"step": 4980
},
{
"epoch": 1.41,
"learning_rate": 5.364973896535359e-06,
"loss": 0.3491,
"step": 4990
},
{
"epoch": 1.41,
"learning_rate": 5.355481727574752e-06,
"loss": 0.5927,
"step": 5000
},
{
"epoch": 1.41,
"eval_loss": 0.3536190092563629,
"eval_runtime": 209.407,
"eval_samples_per_second": 9.651,
"eval_steps_per_second": 2.416,
"step": 5000
},
{
"epoch": 1.41,
"learning_rate": 5.345989558614144e-06,
"loss": 0.1028,
"step": 5010
},
{
"epoch": 1.42,
"learning_rate": 5.336497389653537e-06,
"loss": 0.206,
"step": 5020
},
{
"epoch": 1.42,
"learning_rate": 5.327005220692929e-06,
"loss": 0.2538,
"step": 5030
},
{
"epoch": 1.42,
"learning_rate": 5.317513051732321e-06,
"loss": 0.4764,
"step": 5040
},
{
"epoch": 1.42,
"learning_rate": 5.308020882771714e-06,
"loss": 0.4456,
"step": 5050
},
{
"epoch": 1.43,
"learning_rate": 5.2985287138111065e-06,
"loss": 0.2988,
"step": 5060
},
{
"epoch": 1.43,
"learning_rate": 5.289036544850499e-06,
"loss": 0.4721,
"step": 5070
},
{
"epoch": 1.43,
"learning_rate": 5.279544375889891e-06,
"loss": 0.4259,
"step": 5080
},
{
"epoch": 1.44,
"learning_rate": 5.2700522069292835e-06,
"loss": 0.1379,
"step": 5090
},
{
"epoch": 1.44,
"learning_rate": 5.260560037968676e-06,
"loss": 0.4755,
"step": 5100
},
{
"epoch": 1.44,
"learning_rate": 5.251067869008069e-06,
"loss": 0.2663,
"step": 5110
},
{
"epoch": 1.44,
"learning_rate": 5.241575700047461e-06,
"loss": 0.3229,
"step": 5120
},
{
"epoch": 1.45,
"learning_rate": 5.232083531086853e-06,
"loss": 0.2666,
"step": 5130
},
{
"epoch": 1.45,
"learning_rate": 5.222591362126246e-06,
"loss": 0.3572,
"step": 5140
},
{
"epoch": 1.45,
"learning_rate": 5.213099193165638e-06,
"loss": 0.4938,
"step": 5150
},
{
"epoch": 1.45,
"learning_rate": 5.203607024205031e-06,
"loss": 0.5069,
"step": 5160
},
{
"epoch": 1.46,
"learning_rate": 5.194114855244424e-06,
"loss": 0.6373,
"step": 5170
},
{
"epoch": 1.46,
"learning_rate": 5.184622686283817e-06,
"loss": 0.4395,
"step": 5180
},
{
"epoch": 1.46,
"learning_rate": 5.175130517323209e-06,
"loss": 0.3435,
"step": 5190
},
{
"epoch": 1.47,
"learning_rate": 5.165638348362601e-06,
"loss": 0.2505,
"step": 5200
},
{
"epoch": 1.47,
"learning_rate": 5.156146179401994e-06,
"loss": 0.4521,
"step": 5210
},
{
"epoch": 1.47,
"learning_rate": 5.146654010441386e-06,
"loss": 0.4649,
"step": 5220
},
{
"epoch": 1.47,
"learning_rate": 5.137161841480779e-06,
"loss": 0.429,
"step": 5230
},
{
"epoch": 1.48,
"learning_rate": 5.1276696725201716e-06,
"loss": 0.4792,
"step": 5240
},
{
"epoch": 1.48,
"learning_rate": 5.118177503559564e-06,
"loss": 0.4247,
"step": 5250
},
{
"epoch": 1.48,
"learning_rate": 5.108685334598956e-06,
"loss": 0.5332,
"step": 5260
},
{
"epoch": 1.49,
"learning_rate": 5.0991931656383485e-06,
"loss": 0.1256,
"step": 5270
},
{
"epoch": 1.49,
"learning_rate": 5.089700996677741e-06,
"loss": 0.6229,
"step": 5280
},
{
"epoch": 1.49,
"learning_rate": 5.080208827717134e-06,
"loss": 0.5491,
"step": 5290
},
{
"epoch": 1.49,
"learning_rate": 5.070716658756526e-06,
"loss": 0.5865,
"step": 5300
},
{
"epoch": 1.5,
"learning_rate": 5.061224489795918e-06,
"loss": 0.3055,
"step": 5310
},
{
"epoch": 1.5,
"learning_rate": 5.051732320835311e-06,
"loss": 0.2392,
"step": 5320
},
{
"epoch": 1.5,
"learning_rate": 5.042240151874703e-06,
"loss": 0.2454,
"step": 5330
},
{
"epoch": 1.51,
"learning_rate": 5.032747982914097e-06,
"loss": 0.3592,
"step": 5340
},
{
"epoch": 1.51,
"learning_rate": 5.023255813953489e-06,
"loss": 0.388,
"step": 5350
},
{
"epoch": 1.51,
"learning_rate": 5.013763644992882e-06,
"loss": 0.1818,
"step": 5360
},
{
"epoch": 1.51,
"learning_rate": 5.0042714760322745e-06,
"loss": 0.3422,
"step": 5370
},
{
"epoch": 1.52,
"learning_rate": 4.994779307071666e-06,
"loss": 0.4801,
"step": 5380
},
{
"epoch": 1.52,
"learning_rate": 4.985287138111059e-06,
"loss": 0.7783,
"step": 5390
},
{
"epoch": 1.52,
"learning_rate": 4.975794969150451e-06,
"loss": 0.1979,
"step": 5400
},
{
"epoch": 1.53,
"learning_rate": 4.966302800189844e-06,
"loss": 0.1784,
"step": 5410
},
{
"epoch": 1.53,
"learning_rate": 4.956810631229237e-06,
"loss": 0.3514,
"step": 5420
},
{
"epoch": 1.53,
"learning_rate": 4.947318462268629e-06,
"loss": 0.4134,
"step": 5430
},
{
"epoch": 1.53,
"learning_rate": 4.937826293308021e-06,
"loss": 0.3057,
"step": 5440
},
{
"epoch": 1.54,
"learning_rate": 4.9283341243474135e-06,
"loss": 0.5433,
"step": 5450
},
{
"epoch": 1.54,
"learning_rate": 4.918841955386806e-06,
"loss": 0.6126,
"step": 5460
},
{
"epoch": 1.54,
"learning_rate": 4.909349786426199e-06,
"loss": 0.2329,
"step": 5470
},
{
"epoch": 1.54,
"learning_rate": 4.899857617465591e-06,
"loss": 0.3048,
"step": 5480
},
{
"epoch": 1.55,
"learning_rate": 4.890365448504984e-06,
"loss": 0.6663,
"step": 5490
},
{
"epoch": 1.55,
"learning_rate": 4.8808732795443765e-06,
"loss": 0.3444,
"step": 5500
},
{
"epoch": 1.55,
"eval_loss": 0.3520536720752716,
"eval_runtime": 210.3626,
"eval_samples_per_second": 9.607,
"eval_steps_per_second": 2.405,
"step": 5500
},
{
"epoch": 1.55,
"learning_rate": 4.871381110583769e-06,
"loss": 0.7078,
"step": 5510
},
{
"epoch": 1.56,
"learning_rate": 4.861888941623161e-06,
"loss": 0.3893,
"step": 5520
},
{
"epoch": 1.56,
"learning_rate": 4.8523967726625535e-06,
"loss": 0.3299,
"step": 5530
},
{
"epoch": 1.56,
"learning_rate": 4.842904603701946e-06,
"loss": 0.2718,
"step": 5540
},
{
"epoch": 1.56,
"learning_rate": 4.833412434741339e-06,
"loss": 0.4207,
"step": 5550
},
{
"epoch": 1.57,
"learning_rate": 4.823920265780731e-06,
"loss": 0.2569,
"step": 5560
},
{
"epoch": 1.57,
"learning_rate": 4.814428096820124e-06,
"loss": 0.3158,
"step": 5570
},
{
"epoch": 1.57,
"learning_rate": 4.8049359278595164e-06,
"loss": 0.4117,
"step": 5580
},
{
"epoch": 1.58,
"learning_rate": 4.795443758898909e-06,
"loss": 0.4646,
"step": 5590
},
{
"epoch": 1.58,
"learning_rate": 4.785951589938302e-06,
"loss": 0.3877,
"step": 5600
},
{
"epoch": 1.58,
"learning_rate": 4.776459420977693e-06,
"loss": 0.2987,
"step": 5610
},
{
"epoch": 1.58,
"learning_rate": 4.766967252017086e-06,
"loss": 0.3859,
"step": 5620
},
{
"epoch": 1.59,
"learning_rate": 4.757475083056479e-06,
"loss": 0.138,
"step": 5630
},
{
"epoch": 1.59,
"learning_rate": 4.747982914095871e-06,
"loss": 0.2836,
"step": 5640
},
{
"epoch": 1.59,
"learning_rate": 4.738490745135264e-06,
"loss": 0.4993,
"step": 5650
},
{
"epoch": 1.6,
"learning_rate": 4.728998576174656e-06,
"loss": 0.4256,
"step": 5660
},
{
"epoch": 1.6,
"learning_rate": 4.719506407214049e-06,
"loss": 0.5983,
"step": 5670
},
{
"epoch": 1.6,
"learning_rate": 4.7100142382534416e-06,
"loss": 0.4265,
"step": 5680
},
{
"epoch": 1.6,
"learning_rate": 4.700522069292834e-06,
"loss": 0.2854,
"step": 5690
},
{
"epoch": 1.61,
"learning_rate": 4.691029900332226e-06,
"loss": 0.3984,
"step": 5700
},
{
"epoch": 1.61,
"learning_rate": 4.6815377313716185e-06,
"loss": 0.34,
"step": 5710
},
{
"epoch": 1.61,
"learning_rate": 4.672045562411011e-06,
"loss": 0.4355,
"step": 5720
},
{
"epoch": 1.62,
"learning_rate": 4.662553393450404e-06,
"loss": 0.4004,
"step": 5730
},
{
"epoch": 1.62,
"learning_rate": 4.653061224489796e-06,
"loss": 0.3462,
"step": 5740
},
{
"epoch": 1.62,
"learning_rate": 4.643569055529189e-06,
"loss": 0.3231,
"step": 5750
},
{
"epoch": 1.62,
"learning_rate": 4.6340768865685815e-06,
"loss": 0.6457,
"step": 5760
},
{
"epoch": 1.63,
"learning_rate": 4.624584717607974e-06,
"loss": 0.4086,
"step": 5770
},
{
"epoch": 1.63,
"learning_rate": 4.615092548647367e-06,
"loss": 0.2528,
"step": 5780
},
{
"epoch": 1.63,
"learning_rate": 4.6056003796867584e-06,
"loss": 0.5488,
"step": 5790
},
{
"epoch": 1.64,
"learning_rate": 4.596108210726151e-06,
"loss": 0.381,
"step": 5800
},
{
"epoch": 1.64,
"learning_rate": 4.586616041765544e-06,
"loss": 0.5675,
"step": 5810
},
{
"epoch": 1.64,
"learning_rate": 4.577123872804936e-06,
"loss": 0.5866,
"step": 5820
},
{
"epoch": 1.64,
"learning_rate": 4.567631703844329e-06,
"loss": 0.4035,
"step": 5830
},
{
"epoch": 1.65,
"learning_rate": 4.558139534883721e-06,
"loss": 0.2322,
"step": 5840
},
{
"epoch": 1.65,
"learning_rate": 4.548647365923114e-06,
"loss": 0.717,
"step": 5850
},
{
"epoch": 1.65,
"learning_rate": 4.539155196962507e-06,
"loss": 0.5347,
"step": 5860
},
{
"epoch": 1.65,
"learning_rate": 4.529663028001899e-06,
"loss": 0.2423,
"step": 5870
},
{
"epoch": 1.66,
"learning_rate": 4.520170859041291e-06,
"loss": 0.439,
"step": 5880
},
{
"epoch": 1.66,
"learning_rate": 4.5106786900806835e-06,
"loss": 0.4595,
"step": 5890
},
{
"epoch": 1.66,
"learning_rate": 4.501186521120076e-06,
"loss": 0.5906,
"step": 5900
},
{
"epoch": 1.67,
"learning_rate": 4.491694352159469e-06,
"loss": 0.5294,
"step": 5910
},
{
"epoch": 1.67,
"learning_rate": 4.482202183198861e-06,
"loss": 0.2951,
"step": 5920
},
{
"epoch": 1.67,
"learning_rate": 4.472710014238254e-06,
"loss": 0.6254,
"step": 5930
},
{
"epoch": 1.67,
"learning_rate": 4.4632178452776465e-06,
"loss": 0.5945,
"step": 5940
},
{
"epoch": 1.68,
"learning_rate": 4.453725676317039e-06,
"loss": 0.4814,
"step": 5950
},
{
"epoch": 1.68,
"learning_rate": 4.444233507356432e-06,
"loss": 0.4048,
"step": 5960
},
{
"epoch": 1.68,
"learning_rate": 4.4347413383958235e-06,
"loss": 0.1721,
"step": 5970
},
{
"epoch": 1.69,
"learning_rate": 4.425249169435216e-06,
"loss": 0.2362,
"step": 5980
},
{
"epoch": 1.69,
"learning_rate": 4.415757000474609e-06,
"loss": 0.2302,
"step": 5990
},
{
"epoch": 1.69,
"learning_rate": 4.406264831514001e-06,
"loss": 0.4041,
"step": 6000
},
{
"epoch": 1.69,
"eval_loss": 0.34984728693962097,
"eval_runtime": 212.0644,
"eval_samples_per_second": 9.53,
"eval_steps_per_second": 2.386,
"step": 6000
},
{
"epoch": 1.69,
"learning_rate": 4.396772662553394e-06,
"loss": 0.8092,
"step": 6010
},
{
"epoch": 1.7,
"learning_rate": 4.3872804935927865e-06,
"loss": 0.327,
"step": 6020
},
{
"epoch": 1.7,
"learning_rate": 4.377788324632179e-06,
"loss": 0.3231,
"step": 6030
},
{
"epoch": 1.7,
"learning_rate": 4.368296155671572e-06,
"loss": 0.4055,
"step": 6040
},
{
"epoch": 1.71,
"learning_rate": 4.358803986710964e-06,
"loss": 0.2245,
"step": 6050
},
{
"epoch": 1.71,
"learning_rate": 4.349311817750356e-06,
"loss": 0.4406,
"step": 6060
},
{
"epoch": 1.71,
"learning_rate": 4.339819648789749e-06,
"loss": 0.2078,
"step": 6070
},
{
"epoch": 1.71,
"learning_rate": 4.330327479829141e-06,
"loss": 0.5263,
"step": 6080
},
{
"epoch": 1.72,
"learning_rate": 4.320835310868534e-06,
"loss": 0.6044,
"step": 6090
},
{
"epoch": 1.72,
"learning_rate": 4.311343141907926e-06,
"loss": 0.3704,
"step": 6100
},
{
"epoch": 1.72,
"learning_rate": 4.301850972947319e-06,
"loss": 0.4697,
"step": 6110
},
{
"epoch": 1.73,
"learning_rate": 4.2923588039867116e-06,
"loss": 0.3077,
"step": 6120
},
{
"epoch": 1.73,
"learning_rate": 4.282866635026104e-06,
"loss": 0.4685,
"step": 6130
},
{
"epoch": 1.73,
"learning_rate": 4.273374466065496e-06,
"loss": 0.4972,
"step": 6140
},
{
"epoch": 1.73,
"learning_rate": 4.2638822971048885e-06,
"loss": 0.4597,
"step": 6150
},
{
"epoch": 1.74,
"learning_rate": 4.254390128144281e-06,
"loss": 0.2447,
"step": 6160
},
{
"epoch": 1.74,
"learning_rate": 4.244897959183674e-06,
"loss": 0.5126,
"step": 6170
},
{
"epoch": 1.74,
"learning_rate": 4.235405790223066e-06,
"loss": 0.3077,
"step": 6180
},
{
"epoch": 1.75,
"learning_rate": 4.225913621262459e-06,
"loss": 0.3864,
"step": 6190
},
{
"epoch": 1.75,
"learning_rate": 4.2164214523018515e-06,
"loss": 0.5483,
"step": 6200
},
{
"epoch": 1.75,
"learning_rate": 4.206929283341244e-06,
"loss": 0.5613,
"step": 6210
},
{
"epoch": 1.75,
"learning_rate": 4.197437114380637e-06,
"loss": 0.3505,
"step": 6220
},
{
"epoch": 1.76,
"learning_rate": 4.1879449454200284e-06,
"loss": 0.4028,
"step": 6230
},
{
"epoch": 1.76,
"learning_rate": 4.178452776459421e-06,
"loss": 0.3838,
"step": 6240
},
{
"epoch": 1.76,
"learning_rate": 4.168960607498814e-06,
"loss": 0.2545,
"step": 6250
},
{
"epoch": 1.76,
"learning_rate": 4.159468438538206e-06,
"loss": 0.3897,
"step": 6260
},
{
"epoch": 1.77,
"learning_rate": 4.149976269577599e-06,
"loss": 0.1108,
"step": 6270
},
{
"epoch": 1.77,
"learning_rate": 4.1404841006169914e-06,
"loss": 0.2364,
"step": 6280
},
{
"epoch": 1.77,
"learning_rate": 4.130991931656384e-06,
"loss": 0.4792,
"step": 6290
},
{
"epoch": 1.78,
"learning_rate": 4.121499762695777e-06,
"loss": 0.3424,
"step": 6300
},
{
"epoch": 1.78,
"learning_rate": 4.112007593735169e-06,
"loss": 0.3274,
"step": 6310
},
{
"epoch": 1.78,
"learning_rate": 4.102515424774561e-06,
"loss": 0.2145,
"step": 6320
},
{
"epoch": 1.78,
"learning_rate": 4.0930232558139536e-06,
"loss": 0.4916,
"step": 6330
},
{
"epoch": 1.79,
"learning_rate": 4.083531086853346e-06,
"loss": 0.4363,
"step": 6340
},
{
"epoch": 1.79,
"learning_rate": 4.074038917892739e-06,
"loss": 0.3422,
"step": 6350
},
{
"epoch": 1.79,
"learning_rate": 4.064546748932131e-06,
"loss": 0.4339,
"step": 6360
},
{
"epoch": 1.8,
"learning_rate": 4.055054579971524e-06,
"loss": 0.5902,
"step": 6370
},
{
"epoch": 1.8,
"learning_rate": 4.0455624110109165e-06,
"loss": 0.3237,
"step": 6380
},
{
"epoch": 1.8,
"learning_rate": 4.036070242050309e-06,
"loss": 0.4783,
"step": 6390
},
{
"epoch": 1.8,
"learning_rate": 4.026578073089702e-06,
"loss": 0.4535,
"step": 6400
},
{
"epoch": 1.81,
"learning_rate": 4.0170859041290935e-06,
"loss": 0.5848,
"step": 6410
},
{
"epoch": 1.81,
"learning_rate": 4.007593735168486e-06,
"loss": 0.501,
"step": 6420
},
{
"epoch": 1.81,
"learning_rate": 3.998101566207879e-06,
"loss": 0.5878,
"step": 6430
},
{
"epoch": 1.82,
"learning_rate": 3.988609397247271e-06,
"loss": 0.2525,
"step": 6440
},
{
"epoch": 1.82,
"learning_rate": 3.979117228286664e-06,
"loss": 0.1343,
"step": 6450
},
{
"epoch": 1.82,
"learning_rate": 3.9696250593260565e-06,
"loss": 0.4367,
"step": 6460
},
{
"epoch": 1.82,
"learning_rate": 3.960132890365449e-06,
"loss": 0.4518,
"step": 6470
},
{
"epoch": 1.83,
"learning_rate": 3.950640721404842e-06,
"loss": 0.1851,
"step": 6480
},
{
"epoch": 1.83,
"learning_rate": 3.941148552444234e-06,
"loss": 0.5702,
"step": 6490
},
{
"epoch": 1.83,
"learning_rate": 3.931656383483626e-06,
"loss": 0.5526,
"step": 6500
},
{
"epoch": 1.83,
"eval_loss": 0.34791234135627747,
"eval_runtime": 210.9344,
"eval_samples_per_second": 9.581,
"eval_steps_per_second": 2.399,
"step": 6500
},
{
"epoch": 1.84,
"learning_rate": 3.922164214523019e-06,
"loss": 0.2374,
"step": 6510
},
{
"epoch": 1.84,
"learning_rate": 3.912672045562411e-06,
"loss": 0.2343,
"step": 6520
},
{
"epoch": 1.84,
"learning_rate": 3.903179876601804e-06,
"loss": 0.3023,
"step": 6530
},
{
"epoch": 1.84,
"learning_rate": 3.893687707641196e-06,
"loss": 0.2294,
"step": 6540
},
{
"epoch": 1.85,
"learning_rate": 3.884195538680589e-06,
"loss": 0.2737,
"step": 6550
},
{
"epoch": 1.85,
"learning_rate": 3.874703369719982e-06,
"loss": 0.718,
"step": 6560
},
{
"epoch": 1.85,
"learning_rate": 3.865211200759374e-06,
"loss": 0.4965,
"step": 6570
},
{
"epoch": 1.86,
"learning_rate": 3.855719031798767e-06,
"loss": 0.5557,
"step": 6580
},
{
"epoch": 1.86,
"learning_rate": 3.8462268628381585e-06,
"loss": 0.3303,
"step": 6590
},
{
"epoch": 1.86,
"learning_rate": 3.836734693877551e-06,
"loss": 0.3357,
"step": 6600
},
{
"epoch": 1.86,
"learning_rate": 3.827242524916944e-06,
"loss": 0.1917,
"step": 6610
},
{
"epoch": 1.87,
"learning_rate": 3.817750355956336e-06,
"loss": 0.284,
"step": 6620
},
{
"epoch": 1.87,
"learning_rate": 3.8082581869957285e-06,
"loss": 0.4455,
"step": 6630
},
{
"epoch": 1.87,
"learning_rate": 3.7987660180351215e-06,
"loss": 0.2505,
"step": 6640
},
{
"epoch": 1.87,
"learning_rate": 3.789273849074514e-06,
"loss": 0.3804,
"step": 6650
},
{
"epoch": 1.88,
"learning_rate": 3.7797816801139063e-06,
"loss": 0.3043,
"step": 6660
},
{
"epoch": 1.88,
"learning_rate": 3.770289511153299e-06,
"loss": 0.1626,
"step": 6670
},
{
"epoch": 1.88,
"learning_rate": 3.7607973421926915e-06,
"loss": 0.2546,
"step": 6680
},
{
"epoch": 1.89,
"learning_rate": 3.7513051732320836e-06,
"loss": 0.4317,
"step": 6690
},
{
"epoch": 1.89,
"learning_rate": 3.7418130042714762e-06,
"loss": 0.3358,
"step": 6700
},
{
"epoch": 1.89,
"learning_rate": 3.732320835310869e-06,
"loss": 0.5867,
"step": 6710
},
{
"epoch": 1.89,
"learning_rate": 3.722828666350261e-06,
"loss": 0.6311,
"step": 6720
},
{
"epoch": 1.9,
"learning_rate": 3.713336497389654e-06,
"loss": 0.3264,
"step": 6730
},
{
"epoch": 1.9,
"learning_rate": 3.7038443284290466e-06,
"loss": 0.4179,
"step": 6740
},
{
"epoch": 1.9,
"learning_rate": 3.694352159468439e-06,
"loss": 0.372,
"step": 6750
},
{
"epoch": 1.91,
"learning_rate": 3.6848599905078314e-06,
"loss": 0.3102,
"step": 6760
},
{
"epoch": 1.91,
"learning_rate": 3.675367821547224e-06,
"loss": 0.494,
"step": 6770
},
{
"epoch": 1.91,
"learning_rate": 3.665875652586616e-06,
"loss": 0.2307,
"step": 6780
},
{
"epoch": 1.91,
"learning_rate": 3.6563834836260088e-06,
"loss": 0.4356,
"step": 6790
},
{
"epoch": 1.92,
"learning_rate": 3.646891314665401e-06,
"loss": 0.5974,
"step": 6800
},
{
"epoch": 1.92,
"learning_rate": 3.6373991457047935e-06,
"loss": 0.1553,
"step": 6810
},
{
"epoch": 1.92,
"learning_rate": 3.6279069767441866e-06,
"loss": 0.429,
"step": 6820
},
{
"epoch": 1.93,
"learning_rate": 3.618414807783579e-06,
"loss": 0.2103,
"step": 6830
},
{
"epoch": 1.93,
"learning_rate": 3.6089226388229713e-06,
"loss": 0.505,
"step": 6840
},
{
"epoch": 1.93,
"learning_rate": 3.599430469862364e-06,
"loss": 0.3072,
"step": 6850
},
{
"epoch": 1.93,
"learning_rate": 3.5899383009017565e-06,
"loss": 0.5361,
"step": 6860
},
{
"epoch": 1.94,
"learning_rate": 3.5804461319411487e-06,
"loss": 0.6273,
"step": 6870
},
{
"epoch": 1.94,
"learning_rate": 3.5709539629805413e-06,
"loss": 0.3035,
"step": 6880
},
{
"epoch": 1.94,
"learning_rate": 3.5614617940199335e-06,
"loss": 0.3123,
"step": 6890
},
{
"epoch": 1.95,
"learning_rate": 3.551969625059326e-06,
"loss": 0.4275,
"step": 6900
},
{
"epoch": 1.95,
"learning_rate": 3.542477456098719e-06,
"loss": 0.4825,
"step": 6910
},
{
"epoch": 1.95,
"learning_rate": 3.5329852871381117e-06,
"loss": 0.3058,
"step": 6920
},
{
"epoch": 1.95,
"learning_rate": 3.523493118177504e-06,
"loss": 0.4175,
"step": 6930
},
{
"epoch": 1.96,
"learning_rate": 3.5140009492168964e-06,
"loss": 0.3237,
"step": 6940
},
{
"epoch": 1.96,
"learning_rate": 3.5045087802562886e-06,
"loss": 0.5556,
"step": 6950
},
{
"epoch": 1.96,
"learning_rate": 3.495016611295681e-06,
"loss": 0.2021,
"step": 6960
},
{
"epoch": 1.97,
"learning_rate": 3.485524442335074e-06,
"loss": 0.4329,
"step": 6970
},
{
"epoch": 1.97,
"learning_rate": 3.476032273374466e-06,
"loss": 0.465,
"step": 6980
},
{
"epoch": 1.97,
"learning_rate": 3.4665401044138586e-06,
"loss": 0.4829,
"step": 6990
},
{
"epoch": 1.97,
"learning_rate": 3.4570479354532516e-06,
"loss": 0.3314,
"step": 7000
},
{
"epoch": 1.97,
"eval_loss": 0.3463754951953888,
"eval_runtime": 211.4262,
"eval_samples_per_second": 9.559,
"eval_steps_per_second": 2.393,
"step": 7000
},
{
"epoch": 1.98,
"learning_rate": 3.447555766492644e-06,
"loss": 0.3652,
"step": 7010
},
{
"epoch": 1.98,
"learning_rate": 3.4380635975320364e-06,
"loss": 0.3147,
"step": 7020
},
{
"epoch": 1.98,
"learning_rate": 3.428571428571429e-06,
"loss": 0.1345,
"step": 7030
},
{
"epoch": 1.98,
"learning_rate": 3.419079259610821e-06,
"loss": 0.361,
"step": 7040
},
{
"epoch": 1.99,
"learning_rate": 3.4095870906502137e-06,
"loss": 0.5192,
"step": 7050
},
{
"epoch": 1.99,
"learning_rate": 3.4000949216896063e-06,
"loss": 0.37,
"step": 7060
},
{
"epoch": 1.99,
"learning_rate": 3.3906027527289985e-06,
"loss": 0.4377,
"step": 7070
},
{
"epoch": 2.0,
"learning_rate": 3.3811105837683915e-06,
"loss": 0.5498,
"step": 7080
},
{
"epoch": 2.0,
"learning_rate": 3.371618414807784e-06,
"loss": 0.2211,
"step": 7090
},
{
"epoch": 2.0,
"learning_rate": 3.3621262458471767e-06,
"loss": 0.5511,
"step": 7100
},
{
"epoch": 2.0,
"learning_rate": 3.352634076886569e-06,
"loss": 0.4069,
"step": 7110
},
{
"epoch": 2.01,
"learning_rate": 3.3431419079259615e-06,
"loss": 0.51,
"step": 7120
},
{
"epoch": 2.01,
"learning_rate": 3.3336497389653537e-06,
"loss": 0.3028,
"step": 7130
},
{
"epoch": 2.01,
"learning_rate": 3.3241575700047463e-06,
"loss": 0.2983,
"step": 7140
},
{
"epoch": 2.02,
"learning_rate": 3.314665401044139e-06,
"loss": 0.7088,
"step": 7150
},
{
"epoch": 2.02,
"learning_rate": 3.305173232083531e-06,
"loss": 0.3242,
"step": 7160
},
{
"epoch": 2.02,
"learning_rate": 3.295681063122924e-06,
"loss": 0.3656,
"step": 7170
},
{
"epoch": 2.02,
"learning_rate": 3.2861888941623166e-06,
"loss": 0.5446,
"step": 7180
},
{
"epoch": 2.03,
"learning_rate": 3.276696725201709e-06,
"loss": 0.1479,
"step": 7190
},
{
"epoch": 2.03,
"learning_rate": 3.2672045562411014e-06,
"loss": 0.2724,
"step": 7200
},
{
"epoch": 2.03,
"learning_rate": 3.257712387280494e-06,
"loss": 0.4428,
"step": 7210
},
{
"epoch": 2.04,
"learning_rate": 3.248220218319886e-06,
"loss": 0.374,
"step": 7220
},
{
"epoch": 2.04,
"learning_rate": 3.2387280493592788e-06,
"loss": 0.4587,
"step": 7230
},
{
"epoch": 2.04,
"learning_rate": 3.2292358803986714e-06,
"loss": 0.2607,
"step": 7240
},
{
"epoch": 2.04,
"learning_rate": 3.2197437114380635e-06,
"loss": 0.31,
"step": 7250
},
{
"epoch": 2.05,
"learning_rate": 3.2102515424774566e-06,
"loss": 0.5392,
"step": 7260
},
{
"epoch": 2.05,
"learning_rate": 3.200759373516849e-06,
"loss": 0.3685,
"step": 7270
},
{
"epoch": 2.05,
"learning_rate": 3.1912672045562413e-06,
"loss": 0.6325,
"step": 7280
},
{
"epoch": 2.06,
"learning_rate": 3.181775035595634e-06,
"loss": 0.4223,
"step": 7290
},
{
"epoch": 2.06,
"learning_rate": 3.1722828666350265e-06,
"loss": 0.3727,
"step": 7300
},
{
"epoch": 2.06,
"learning_rate": 3.1627906976744187e-06,
"loss": 0.4869,
"step": 7310
},
{
"epoch": 2.06,
"learning_rate": 3.1532985287138113e-06,
"loss": 0.2286,
"step": 7320
},
{
"epoch": 2.07,
"learning_rate": 3.143806359753204e-06,
"loss": 0.4144,
"step": 7330
},
{
"epoch": 2.07,
"learning_rate": 3.134314190792596e-06,
"loss": 0.452,
"step": 7340
},
{
"epoch": 2.07,
"learning_rate": 3.124822021831989e-06,
"loss": 0.3295,
"step": 7350
},
{
"epoch": 2.07,
"learning_rate": 3.1153298528713817e-06,
"loss": 0.2194,
"step": 7360
},
{
"epoch": 2.08,
"learning_rate": 3.105837683910774e-06,
"loss": 0.2943,
"step": 7370
},
{
"epoch": 2.08,
"learning_rate": 3.0963455149501664e-06,
"loss": 0.4255,
"step": 7380
},
{
"epoch": 2.08,
"learning_rate": 3.086853345989559e-06,
"loss": 0.2137,
"step": 7390
},
{
"epoch": 2.09,
"learning_rate": 3.0773611770289512e-06,
"loss": 0.3923,
"step": 7400
},
{
"epoch": 2.09,
"learning_rate": 3.067869008068344e-06,
"loss": 0.3551,
"step": 7410
},
{
"epoch": 2.09,
"learning_rate": 3.058376839107736e-06,
"loss": 0.3381,
"step": 7420
},
{
"epoch": 2.09,
"learning_rate": 3.0488846701471286e-06,
"loss": 0.2882,
"step": 7430
},
{
"epoch": 2.1,
"learning_rate": 3.0393925011865216e-06,
"loss": 0.3432,
"step": 7440
},
{
"epoch": 2.1,
"learning_rate": 3.029900332225914e-06,
"loss": 0.5384,
"step": 7450
},
{
"epoch": 2.1,
"learning_rate": 3.0204081632653064e-06,
"loss": 0.3176,
"step": 7460
},
{
"epoch": 2.11,
"learning_rate": 3.010915994304699e-06,
"loss": 0.6058,
"step": 7470
},
{
"epoch": 2.11,
"learning_rate": 3.0014238253440916e-06,
"loss": 0.38,
"step": 7480
},
{
"epoch": 2.11,
"learning_rate": 2.9919316563834837e-06,
"loss": 0.3803,
"step": 7490
},
{
"epoch": 2.11,
"learning_rate": 2.9824394874228763e-06,
"loss": 0.4602,
"step": 7500
},
{
"epoch": 2.11,
"eval_loss": 0.3481377065181732,
"eval_runtime": 211.3751,
"eval_samples_per_second": 9.561,
"eval_steps_per_second": 2.394,
"step": 7500
},
{
"epoch": 2.12,
"learning_rate": 2.9729473184622685e-06,
"loss": 0.4522,
"step": 7510
},
{
"epoch": 2.12,
"learning_rate": 2.963455149501661e-06,
"loss": 0.227,
"step": 7520
},
{
"epoch": 2.12,
"learning_rate": 2.953962980541054e-06,
"loss": 0.4448,
"step": 7530
},
{
"epoch": 2.13,
"learning_rate": 2.9444708115804467e-06,
"loss": 0.538,
"step": 7540
},
{
"epoch": 2.13,
"learning_rate": 2.934978642619839e-06,
"loss": 0.3633,
"step": 7550
},
{
"epoch": 2.13,
"learning_rate": 2.9254864736592315e-06,
"loss": 0.2754,
"step": 7560
},
{
"epoch": 2.13,
"learning_rate": 2.915994304698624e-06,
"loss": 0.5962,
"step": 7570
},
{
"epoch": 2.14,
"learning_rate": 2.9065021357380163e-06,
"loss": 0.2812,
"step": 7580
},
{
"epoch": 2.14,
"learning_rate": 2.897009966777409e-06,
"loss": 0.5014,
"step": 7590
},
{
"epoch": 2.14,
"learning_rate": 2.887517797816801e-06,
"loss": 0.271,
"step": 7600
},
{
"epoch": 2.15,
"learning_rate": 2.8780256288561936e-06,
"loss": 0.2206,
"step": 7610
},
{
"epoch": 2.15,
"learning_rate": 2.8685334598955866e-06,
"loss": 0.3344,
"step": 7620
},
{
"epoch": 2.15,
"learning_rate": 2.8590412909349792e-06,
"loss": 0.4583,
"step": 7630
},
{
"epoch": 2.15,
"learning_rate": 2.8495491219743714e-06,
"loss": 0.4646,
"step": 7640
},
{
"epoch": 2.16,
"learning_rate": 2.840056953013764e-06,
"loss": 0.2786,
"step": 7650
},
{
"epoch": 2.16,
"learning_rate": 2.830564784053156e-06,
"loss": 0.2995,
"step": 7660
},
{
"epoch": 2.16,
"learning_rate": 2.8210726150925488e-06,
"loss": 0.2673,
"step": 7670
},
{
"epoch": 2.17,
"learning_rate": 2.8115804461319414e-06,
"loss": 0.3,
"step": 7680
},
{
"epoch": 2.17,
"learning_rate": 2.8020882771713336e-06,
"loss": 0.4938,
"step": 7690
},
{
"epoch": 2.17,
"learning_rate": 2.792596108210726e-06,
"loss": 0.5341,
"step": 7700
},
{
"epoch": 2.17,
"learning_rate": 2.783103939250119e-06,
"loss": 0.4789,
"step": 7710
},
{
"epoch": 2.18,
"learning_rate": 2.7736117702895118e-06,
"loss": 0.203,
"step": 7720
},
{
"epoch": 2.18,
"learning_rate": 2.764119601328904e-06,
"loss": 0.2085,
"step": 7730
},
{
"epoch": 2.18,
"learning_rate": 2.7546274323682965e-06,
"loss": 0.35,
"step": 7740
},
{
"epoch": 2.18,
"learning_rate": 2.7451352634076887e-06,
"loss": 0.5455,
"step": 7750
},
{
"epoch": 2.19,
"learning_rate": 2.7356430944470813e-06,
"loss": 0.6806,
"step": 7760
},
{
"epoch": 2.19,
"learning_rate": 2.726150925486474e-06,
"loss": 0.3908,
"step": 7770
},
{
"epoch": 2.19,
"learning_rate": 2.716658756525866e-06,
"loss": 0.3496,
"step": 7780
},
{
"epoch": 2.2,
"learning_rate": 2.7071665875652587e-06,
"loss": 0.3023,
"step": 7790
},
{
"epoch": 2.2,
"learning_rate": 2.6976744186046517e-06,
"loss": 0.2246,
"step": 7800
},
{
"epoch": 2.2,
"learning_rate": 2.688182249644044e-06,
"loss": 0.325,
"step": 7810
},
{
"epoch": 2.2,
"learning_rate": 2.6786900806834365e-06,
"loss": 0.344,
"step": 7820
},
{
"epoch": 2.21,
"learning_rate": 2.669197911722829e-06,
"loss": 0.496,
"step": 7830
},
{
"epoch": 2.21,
"learning_rate": 2.6597057427622212e-06,
"loss": 0.1879,
"step": 7840
},
{
"epoch": 2.21,
"learning_rate": 2.650213573801614e-06,
"loss": 0.3855,
"step": 7850
},
{
"epoch": 2.22,
"learning_rate": 2.6407214048410064e-06,
"loss": 0.5485,
"step": 7860
},
{
"epoch": 2.22,
"learning_rate": 2.6312292358803986e-06,
"loss": 0.2999,
"step": 7870
},
{
"epoch": 2.22,
"learning_rate": 2.621737066919791e-06,
"loss": 0.2614,
"step": 7880
},
{
"epoch": 2.22,
"learning_rate": 2.6122448979591842e-06,
"loss": 0.615,
"step": 7890
},
{
"epoch": 2.23,
"learning_rate": 2.6027527289985764e-06,
"loss": 0.4784,
"step": 7900
},
{
"epoch": 2.23,
"learning_rate": 2.593260560037969e-06,
"loss": 0.3825,
"step": 7910
},
{
"epoch": 2.23,
"learning_rate": 2.5837683910773616e-06,
"loss": 0.2182,
"step": 7920
},
{
"epoch": 2.24,
"learning_rate": 2.5742762221167538e-06,
"loss": 0.336,
"step": 7930
},
{
"epoch": 2.24,
"learning_rate": 2.5647840531561463e-06,
"loss": 0.3651,
"step": 7940
},
{
"epoch": 2.24,
"learning_rate": 2.555291884195539e-06,
"loss": 0.2512,
"step": 7950
},
{
"epoch": 2.24,
"learning_rate": 2.545799715234931e-06,
"loss": 0.4523,
"step": 7960
},
{
"epoch": 2.25,
"learning_rate": 2.5363075462743237e-06,
"loss": 0.3506,
"step": 7970
},
{
"epoch": 2.25,
"learning_rate": 2.5268153773137167e-06,
"loss": 0.3505,
"step": 7980
},
{
"epoch": 2.25,
"learning_rate": 2.517323208353109e-06,
"loss": 0.533,
"step": 7990
},
{
"epoch": 2.26,
"learning_rate": 2.5078310393925015e-06,
"loss": 0.4401,
"step": 8000
},
{
"epoch": 2.26,
"eval_loss": 0.34744471311569214,
"eval_runtime": 211.4984,
"eval_samples_per_second": 9.556,
"eval_steps_per_second": 2.392,
"step": 8000
},
{
"epoch": 2.26,
"learning_rate": 2.498338870431894e-06,
"loss": 0.3372,
"step": 8010
},
{
"epoch": 2.26,
"learning_rate": 2.4888467014712863e-06,
"loss": 0.3914,
"step": 8020
},
{
"epoch": 2.26,
"learning_rate": 2.479354532510679e-06,
"loss": 0.4832,
"step": 8030
},
{
"epoch": 2.27,
"learning_rate": 2.4698623635500715e-06,
"loss": 0.3643,
"step": 8040
},
{
"epoch": 2.27,
"learning_rate": 2.460370194589464e-06,
"loss": 0.3191,
"step": 8050
},
{
"epoch": 2.27,
"learning_rate": 2.4508780256288562e-06,
"loss": 0.4392,
"step": 8060
},
{
"epoch": 2.28,
"learning_rate": 2.441385856668249e-06,
"loss": 0.2072,
"step": 8070
},
{
"epoch": 2.28,
"learning_rate": 2.4318936877076414e-06,
"loss": 0.193,
"step": 8080
},
{
"epoch": 2.28,
"learning_rate": 2.422401518747034e-06,
"loss": 0.3453,
"step": 8090
},
{
"epoch": 2.28,
"learning_rate": 2.4129093497864266e-06,
"loss": 0.3169,
"step": 8100
},
{
"epoch": 2.29,
"learning_rate": 2.403417180825819e-06,
"loss": 0.3616,
"step": 8110
},
{
"epoch": 2.29,
"learning_rate": 2.3939250118652114e-06,
"loss": 0.2884,
"step": 8120
},
{
"epoch": 2.29,
"learning_rate": 2.384432842904604e-06,
"loss": 0.5743,
"step": 8130
},
{
"epoch": 2.29,
"learning_rate": 2.3749406739439966e-06,
"loss": 0.3009,
"step": 8140
},
{
"epoch": 2.3,
"learning_rate": 2.3654485049833888e-06,
"loss": 0.4973,
"step": 8150
},
{
"epoch": 2.3,
"learning_rate": 2.3559563360227814e-06,
"loss": 0.3357,
"step": 8160
},
{
"epoch": 2.3,
"learning_rate": 2.346464167062174e-06,
"loss": 0.3922,
"step": 8170
},
{
"epoch": 2.31,
"learning_rate": 2.3369719981015665e-06,
"loss": 0.3381,
"step": 8180
},
{
"epoch": 2.31,
"learning_rate": 2.327479829140959e-06,
"loss": 0.2991,
"step": 8190
},
{
"epoch": 2.31,
"learning_rate": 2.3179876601803513e-06,
"loss": 0.4372,
"step": 8200
},
{
"epoch": 2.31,
"learning_rate": 2.308495491219744e-06,
"loss": 0.2993,
"step": 8210
},
{
"epoch": 2.32,
"learning_rate": 2.2990033222591365e-06,
"loss": 0.3867,
"step": 8220
},
{
"epoch": 2.32,
"learning_rate": 2.289511153298529e-06,
"loss": 0.4566,
"step": 8230
},
{
"epoch": 2.32,
"learning_rate": 2.2800189843379213e-06,
"loss": 0.2101,
"step": 8240
},
{
"epoch": 2.33,
"learning_rate": 2.270526815377314e-06,
"loss": 0.2875,
"step": 8250
},
{
"epoch": 2.33,
"learning_rate": 2.2610346464167065e-06,
"loss": 0.4097,
"step": 8260
},
{
"epoch": 2.33,
"learning_rate": 2.251542477456099e-06,
"loss": 0.4438,
"step": 8270
},
{
"epoch": 2.33,
"learning_rate": 2.2420503084954912e-06,
"loss": 0.4007,
"step": 8280
},
{
"epoch": 2.34,
"learning_rate": 2.232558139534884e-06,
"loss": 0.3578,
"step": 8290
},
{
"epoch": 2.34,
"learning_rate": 2.2230659705742764e-06,
"loss": 0.4336,
"step": 8300
},
{
"epoch": 2.34,
"learning_rate": 2.213573801613669e-06,
"loss": 0.6333,
"step": 8310
},
{
"epoch": 2.35,
"learning_rate": 2.2040816326530616e-06,
"loss": 0.1238,
"step": 8320
},
{
"epoch": 2.35,
"learning_rate": 2.194589463692454e-06,
"loss": 0.5357,
"step": 8330
},
{
"epoch": 2.35,
"learning_rate": 2.1850972947318464e-06,
"loss": 0.2623,
"step": 8340
},
{
"epoch": 2.35,
"learning_rate": 2.1756051257712386e-06,
"loss": 0.2794,
"step": 8350
},
{
"epoch": 2.36,
"learning_rate": 2.1661129568106316e-06,
"loss": 0.489,
"step": 8360
},
{
"epoch": 2.36,
"learning_rate": 2.1566207878500238e-06,
"loss": 0.3155,
"step": 8370
},
{
"epoch": 2.36,
"learning_rate": 2.1471286188894164e-06,
"loss": 0.2944,
"step": 8380
},
{
"epoch": 2.37,
"learning_rate": 2.137636449928809e-06,
"loss": 0.4045,
"step": 8390
},
{
"epoch": 2.37,
"learning_rate": 2.1281442809682016e-06,
"loss": 0.3847,
"step": 8400
},
{
"epoch": 2.37,
"learning_rate": 2.118652112007594e-06,
"loss": 0.2581,
"step": 8410
},
{
"epoch": 2.37,
"learning_rate": 2.1091599430469863e-06,
"loss": 0.1977,
"step": 8420
},
{
"epoch": 2.38,
"learning_rate": 2.099667774086379e-06,
"loss": 0.1456,
"step": 8430
},
{
"epoch": 2.38,
"learning_rate": 2.090175605125771e-06,
"loss": 0.3576,
"step": 8440
},
{
"epoch": 2.38,
"learning_rate": 2.080683436165164e-06,
"loss": 0.1775,
"step": 8450
},
{
"epoch": 2.39,
"learning_rate": 2.0711912672045563e-06,
"loss": 0.153,
"step": 8460
},
{
"epoch": 2.39,
"learning_rate": 2.061699098243949e-06,
"loss": 0.4822,
"step": 8470
},
{
"epoch": 2.39,
"learning_rate": 2.0522069292833415e-06,
"loss": 0.3706,
"step": 8480
},
{
"epoch": 2.39,
"learning_rate": 2.042714760322734e-06,
"loss": 0.5487,
"step": 8490
},
{
"epoch": 2.4,
"learning_rate": 2.0332225913621267e-06,
"loss": 0.1947,
"step": 8500
},
{
"epoch": 2.4,
"eval_loss": 0.3472154438495636,
"eval_runtime": 210.133,
"eval_samples_per_second": 9.618,
"eval_steps_per_second": 2.408,
"step": 8500
},
{
"epoch": 2.4,
"learning_rate": 2.023730422401519e-06,
"loss": 0.3501,
"step": 8510
},
{
"epoch": 2.4,
"learning_rate": 2.0142382534409114e-06,
"loss": 0.3572,
"step": 8520
},
{
"epoch": 2.4,
"learning_rate": 2.0047460844803036e-06,
"loss": 0.4292,
"step": 8530
},
{
"epoch": 2.41,
"learning_rate": 1.9952539155196966e-06,
"loss": 0.202,
"step": 8540
},
{
"epoch": 2.41,
"learning_rate": 1.985761746559089e-06,
"loss": 0.5637,
"step": 8550
},
{
"epoch": 2.41,
"learning_rate": 1.9762695775984814e-06,
"loss": 0.5057,
"step": 8560
},
{
"epoch": 2.42,
"learning_rate": 1.966777408637874e-06,
"loss": 0.2232,
"step": 8570
},
{
"epoch": 2.42,
"learning_rate": 1.9572852396772666e-06,
"loss": 0.3422,
"step": 8580
},
{
"epoch": 2.42,
"learning_rate": 1.9477930707166588e-06,
"loss": 0.282,
"step": 8590
},
{
"epoch": 2.42,
"learning_rate": 1.9383009017560514e-06,
"loss": 0.6693,
"step": 8600
},
{
"epoch": 2.43,
"learning_rate": 1.928808732795444e-06,
"loss": 0.3485,
"step": 8610
},
{
"epoch": 2.43,
"learning_rate": 1.9193165638348366e-06,
"loss": 0.5767,
"step": 8620
},
{
"epoch": 2.43,
"learning_rate": 1.909824394874229e-06,
"loss": 0.3807,
"step": 8630
},
{
"epoch": 2.44,
"learning_rate": 1.9003322259136213e-06,
"loss": 0.2077,
"step": 8640
},
{
"epoch": 2.44,
"learning_rate": 1.890840056953014e-06,
"loss": 0.2414,
"step": 8650
},
{
"epoch": 2.44,
"learning_rate": 1.8813478879924063e-06,
"loss": 0.3187,
"step": 8660
},
{
"epoch": 2.44,
"learning_rate": 1.8718557190317991e-06,
"loss": 0.5724,
"step": 8670
},
{
"epoch": 2.45,
"learning_rate": 1.8623635500711915e-06,
"loss": 0.55,
"step": 8680
},
{
"epoch": 2.45,
"learning_rate": 1.8528713811105839e-06,
"loss": 0.386,
"step": 8690
},
{
"epoch": 2.45,
"learning_rate": 1.8433792121499763e-06,
"loss": 0.1704,
"step": 8700
},
{
"epoch": 2.46,
"learning_rate": 1.833887043189369e-06,
"loss": 0.5093,
"step": 8710
},
{
"epoch": 2.46,
"learning_rate": 1.8243948742287615e-06,
"loss": 0.2857,
"step": 8720
},
{
"epoch": 2.46,
"learning_rate": 1.8149027052681538e-06,
"loss": 0.4402,
"step": 8730
},
{
"epoch": 2.46,
"learning_rate": 1.8054105363075464e-06,
"loss": 0.3786,
"step": 8740
},
{
"epoch": 2.47,
"learning_rate": 1.7959183673469388e-06,
"loss": 0.443,
"step": 8750
},
{
"epoch": 2.47,
"learning_rate": 1.7864261983863314e-06,
"loss": 0.1684,
"step": 8760
},
{
"epoch": 2.47,
"learning_rate": 1.776934029425724e-06,
"loss": 0.4989,
"step": 8770
},
{
"epoch": 2.48,
"learning_rate": 1.7674418604651164e-06,
"loss": 0.3821,
"step": 8780
},
{
"epoch": 2.48,
"learning_rate": 1.7579496915045088e-06,
"loss": 0.3299,
"step": 8790
},
{
"epoch": 2.48,
"learning_rate": 1.7484575225439016e-06,
"loss": 0.4878,
"step": 8800
},
{
"epoch": 2.48,
"learning_rate": 1.738965353583294e-06,
"loss": 0.3214,
"step": 8810
},
{
"epoch": 2.49,
"learning_rate": 1.7294731846226864e-06,
"loss": 0.3999,
"step": 8820
},
{
"epoch": 2.49,
"learning_rate": 1.719981015662079e-06,
"loss": 0.2993,
"step": 8830
},
{
"epoch": 2.49,
"learning_rate": 1.7104888467014713e-06,
"loss": 0.415,
"step": 8840
},
{
"epoch": 2.5,
"learning_rate": 1.700996677740864e-06,
"loss": 0.29,
"step": 8850
},
{
"epoch": 2.5,
"learning_rate": 1.6915045087802565e-06,
"loss": 0.3722,
"step": 8860
},
{
"epoch": 2.5,
"learning_rate": 1.682012339819649e-06,
"loss": 0.3986,
"step": 8870
},
{
"epoch": 2.5,
"learning_rate": 1.6725201708590413e-06,
"loss": 0.2658,
"step": 8880
},
{
"epoch": 2.51,
"learning_rate": 1.6630280018984341e-06,
"loss": 0.4891,
"step": 8890
},
{
"epoch": 2.51,
"learning_rate": 1.6535358329378265e-06,
"loss": 0.3952,
"step": 8900
},
{
"epoch": 2.51,
"learning_rate": 1.6440436639772189e-06,
"loss": 0.1337,
"step": 8910
},
{
"epoch": 2.51,
"learning_rate": 1.6345514950166113e-06,
"loss": 0.358,
"step": 8920
},
{
"epoch": 2.52,
"learning_rate": 1.6250593260560039e-06,
"loss": 0.4691,
"step": 8930
},
{
"epoch": 2.52,
"learning_rate": 1.6155671570953965e-06,
"loss": 0.3327,
"step": 8940
},
{
"epoch": 2.52,
"learning_rate": 1.606074988134789e-06,
"loss": 0.5424,
"step": 8950
},
{
"epoch": 2.53,
"learning_rate": 1.5965828191741814e-06,
"loss": 0.4439,
"step": 8960
},
{
"epoch": 2.53,
"learning_rate": 1.5870906502135738e-06,
"loss": 0.7304,
"step": 8970
},
{
"epoch": 2.53,
"learning_rate": 1.5775984812529666e-06,
"loss": 0.2145,
"step": 8980
},
{
"epoch": 2.53,
"learning_rate": 1.568106312292359e-06,
"loss": 0.33,
"step": 8990
},
{
"epoch": 2.54,
"learning_rate": 1.5586141433317514e-06,
"loss": 0.4144,
"step": 9000
},
{
"epoch": 2.54,
"eval_loss": 0.3469783067703247,
"eval_runtime": 210.9536,
"eval_samples_per_second": 9.58,
"eval_steps_per_second": 2.399,
"step": 9000
},
{
"epoch": 2.54,
"learning_rate": 1.5491219743711438e-06,
"loss": 0.4365,
"step": 9010
},
{
"epoch": 2.54,
"learning_rate": 1.5396298054105364e-06,
"loss": 0.306,
"step": 9020
},
{
"epoch": 2.55,
"learning_rate": 1.530137636449929e-06,
"loss": 0.5146,
"step": 9030
},
{
"epoch": 2.55,
"learning_rate": 1.5206454674893214e-06,
"loss": 0.2689,
"step": 9040
},
{
"epoch": 2.55,
"learning_rate": 1.511153298528714e-06,
"loss": 0.2034,
"step": 9050
},
{
"epoch": 2.55,
"learning_rate": 1.5016611295681064e-06,
"loss": 0.4136,
"step": 9060
},
{
"epoch": 2.56,
"learning_rate": 1.492168960607499e-06,
"loss": 0.2729,
"step": 9070
},
{
"epoch": 2.56,
"learning_rate": 1.4826767916468915e-06,
"loss": 0.6817,
"step": 9080
},
{
"epoch": 2.56,
"learning_rate": 1.473184622686284e-06,
"loss": 0.3407,
"step": 9090
},
{
"epoch": 2.57,
"learning_rate": 1.4636924537256763e-06,
"loss": 0.4404,
"step": 9100
},
{
"epoch": 2.57,
"learning_rate": 1.4542002847650687e-06,
"loss": 0.4365,
"step": 9110
},
{
"epoch": 2.57,
"learning_rate": 1.4447081158044615e-06,
"loss": 0.318,
"step": 9120
},
{
"epoch": 2.57,
"learning_rate": 1.435215946843854e-06,
"loss": 0.7077,
"step": 9130
},
{
"epoch": 2.58,
"learning_rate": 1.4257237778832465e-06,
"loss": 0.4964,
"step": 9140
},
{
"epoch": 2.58,
"learning_rate": 1.4162316089226389e-06,
"loss": 0.5746,
"step": 9150
},
{
"epoch": 2.58,
"learning_rate": 1.4067394399620315e-06,
"loss": 0.349,
"step": 9160
},
{
"epoch": 2.59,
"learning_rate": 1.397247271001424e-06,
"loss": 0.4112,
"step": 9170
},
{
"epoch": 2.59,
"learning_rate": 1.3877551020408165e-06,
"loss": 0.3403,
"step": 9180
},
{
"epoch": 2.59,
"learning_rate": 1.3782629330802088e-06,
"loss": 0.4245,
"step": 9190
},
{
"epoch": 2.59,
"learning_rate": 1.3687707641196012e-06,
"loss": 0.473,
"step": 9200
},
{
"epoch": 2.6,
"learning_rate": 1.359278595158994e-06,
"loss": 0.8119,
"step": 9210
},
{
"epoch": 2.6,
"learning_rate": 1.3497864261983864e-06,
"loss": 0.435,
"step": 9220
},
{
"epoch": 2.6,
"learning_rate": 1.3402942572377788e-06,
"loss": 0.2139,
"step": 9230
},
{
"epoch": 2.61,
"learning_rate": 1.3308020882771714e-06,
"loss": 0.4287,
"step": 9240
},
{
"epoch": 2.61,
"learning_rate": 1.321309919316564e-06,
"loss": 0.2869,
"step": 9250
},
{
"epoch": 2.61,
"learning_rate": 1.3118177503559566e-06,
"loss": 0.1564,
"step": 9260
},
{
"epoch": 2.61,
"learning_rate": 1.302325581395349e-06,
"loss": 0.4634,
"step": 9270
},
{
"epoch": 2.62,
"learning_rate": 1.2928334124347414e-06,
"loss": 0.1519,
"step": 9280
},
{
"epoch": 2.62,
"learning_rate": 1.2833412434741342e-06,
"loss": 0.3904,
"step": 9290
},
{
"epoch": 2.62,
"learning_rate": 1.2738490745135266e-06,
"loss": 0.227,
"step": 9300
},
{
"epoch": 2.62,
"learning_rate": 1.264356905552919e-06,
"loss": 0.5029,
"step": 9310
},
{
"epoch": 2.63,
"learning_rate": 1.2548647365923113e-06,
"loss": 0.4979,
"step": 9320
},
{
"epoch": 2.63,
"learning_rate": 1.245372567631704e-06,
"loss": 0.4358,
"step": 9330
},
{
"epoch": 2.63,
"learning_rate": 1.2358803986710965e-06,
"loss": 0.304,
"step": 9340
},
{
"epoch": 2.64,
"learning_rate": 1.226388229710489e-06,
"loss": 0.6384,
"step": 9350
},
{
"epoch": 2.64,
"learning_rate": 1.2168960607498815e-06,
"loss": 0.1619,
"step": 9360
},
{
"epoch": 2.64,
"learning_rate": 1.207403891789274e-06,
"loss": 0.2769,
"step": 9370
},
{
"epoch": 2.64,
"learning_rate": 1.1979117228286665e-06,
"loss": 0.3026,
"step": 9380
},
{
"epoch": 2.65,
"learning_rate": 1.188419553868059e-06,
"loss": 0.3886,
"step": 9390
},
{
"epoch": 2.65,
"learning_rate": 1.1789273849074515e-06,
"loss": 0.1441,
"step": 9400
},
{
"epoch": 2.65,
"learning_rate": 1.1694352159468438e-06,
"loss": 0.3779,
"step": 9410
},
{
"epoch": 2.66,
"learning_rate": 1.1599430469862364e-06,
"loss": 0.3901,
"step": 9420
},
{
"epoch": 2.66,
"learning_rate": 1.1504508780256288e-06,
"loss": 0.2091,
"step": 9430
},
{
"epoch": 2.66,
"learning_rate": 1.1409587090650214e-06,
"loss": 0.3605,
"step": 9440
},
{
"epoch": 2.66,
"learning_rate": 1.131466540104414e-06,
"loss": 0.416,
"step": 9450
},
{
"epoch": 2.67,
"learning_rate": 1.1219743711438064e-06,
"loss": 0.2775,
"step": 9460
},
{
"epoch": 2.67,
"learning_rate": 1.112482202183199e-06,
"loss": 0.4789,
"step": 9470
},
{
"epoch": 2.67,
"learning_rate": 1.1029900332225916e-06,
"loss": 0.4778,
"step": 9480
},
{
"epoch": 2.68,
"learning_rate": 1.093497864261984e-06,
"loss": 0.3076,
"step": 9490
},
{
"epoch": 2.68,
"learning_rate": 1.0840056953013764e-06,
"loss": 0.3726,
"step": 9500
},
{
"epoch": 2.68,
"eval_loss": 0.34603193402290344,
"eval_runtime": 209.9907,
"eval_samples_per_second": 9.624,
"eval_steps_per_second": 2.41,
"step": 9500
},
{
"epoch": 2.68,
"learning_rate": 1.074513526340769e-06,
"loss": 0.4906,
"step": 9510
},
{
"epoch": 2.68,
"learning_rate": 1.0650213573801613e-06,
"loss": 0.3415,
"step": 9520
},
{
"epoch": 2.69,
"learning_rate": 1.055529188419554e-06,
"loss": 0.2651,
"step": 9530
},
{
"epoch": 2.69,
"learning_rate": 1.0460370194589463e-06,
"loss": 0.3119,
"step": 9540
},
{
"epoch": 2.69,
"learning_rate": 1.036544850498339e-06,
"loss": 0.3054,
"step": 9550
},
{
"epoch": 2.7,
"learning_rate": 1.0270526815377315e-06,
"loss": 0.3226,
"step": 9560
},
{
"epoch": 2.7,
"learning_rate": 1.0175605125771241e-06,
"loss": 0.5005,
"step": 9570
},
{
"epoch": 2.7,
"learning_rate": 1.0080683436165165e-06,
"loss": 0.2753,
"step": 9580
},
{
"epoch": 2.7,
"learning_rate": 9.985761746559089e-07,
"loss": 0.2687,
"step": 9590
},
{
"epoch": 2.71,
"learning_rate": 9.890840056953015e-07,
"loss": 0.3084,
"step": 9600
},
{
"epoch": 2.71,
"learning_rate": 9.795918367346939e-07,
"loss": 0.3602,
"step": 9610
},
{
"epoch": 2.71,
"learning_rate": 9.700996677740865e-07,
"loss": 0.3026,
"step": 9620
},
{
"epoch": 2.71,
"learning_rate": 9.606074988134788e-07,
"loss": 0.5057,
"step": 9630
},
{
"epoch": 2.72,
"learning_rate": 9.511153298528716e-07,
"loss": 0.3735,
"step": 9640
},
{
"epoch": 2.72,
"learning_rate": 9.416231608922639e-07,
"loss": 0.5268,
"step": 9650
},
{
"epoch": 2.72,
"learning_rate": 9.321309919316565e-07,
"loss": 0.4173,
"step": 9660
},
{
"epoch": 2.73,
"learning_rate": 9.226388229710489e-07,
"loss": 0.3529,
"step": 9670
},
{
"epoch": 2.73,
"learning_rate": 9.131466540104414e-07,
"loss": 0.3961,
"step": 9680
},
{
"epoch": 2.73,
"learning_rate": 9.03654485049834e-07,
"loss": 0.4331,
"step": 9690
},
{
"epoch": 2.73,
"learning_rate": 8.941623160892264e-07,
"loss": 0.3743,
"step": 9700
},
{
"epoch": 2.74,
"learning_rate": 8.84670147128619e-07,
"loss": 0.2121,
"step": 9710
},
{
"epoch": 2.74,
"learning_rate": 8.751779781680114e-07,
"loss": 0.4233,
"step": 9720
},
{
"epoch": 2.74,
"learning_rate": 8.65685809207404e-07,
"loss": 0.3716,
"step": 9730
},
{
"epoch": 2.75,
"learning_rate": 8.561936402467965e-07,
"loss": 0.275,
"step": 9740
},
{
"epoch": 2.75,
"learning_rate": 8.467014712861891e-07,
"loss": 0.2921,
"step": 9750
},
{
"epoch": 2.75,
"learning_rate": 8.372093023255814e-07,
"loss": 0.3295,
"step": 9760
},
{
"epoch": 2.75,
"learning_rate": 8.27717133364974e-07,
"loss": 0.4326,
"step": 9770
},
{
"epoch": 2.76,
"learning_rate": 8.182249644043664e-07,
"loss": 0.4383,
"step": 9780
},
{
"epoch": 2.76,
"learning_rate": 8.087327954437589e-07,
"loss": 0.3254,
"step": 9790
},
{
"epoch": 2.76,
"learning_rate": 7.992406264831515e-07,
"loss": 0.3985,
"step": 9800
},
{
"epoch": 2.77,
"learning_rate": 7.897484575225439e-07,
"loss": 0.4201,
"step": 9810
},
{
"epoch": 2.77,
"learning_rate": 7.802562885619365e-07,
"loss": 0.15,
"step": 9820
},
{
"epoch": 2.77,
"learning_rate": 7.70764119601329e-07,
"loss": 0.5237,
"step": 9830
},
{
"epoch": 2.77,
"learning_rate": 7.612719506407215e-07,
"loss": 0.3785,
"step": 9840
},
{
"epoch": 2.78,
"learning_rate": 7.51779781680114e-07,
"loss": 0.508,
"step": 9850
},
{
"epoch": 2.78,
"learning_rate": 7.422876127195066e-07,
"loss": 0.3134,
"step": 9860
},
{
"epoch": 2.78,
"learning_rate": 7.327954437588989e-07,
"loss": 0.4777,
"step": 9870
},
{
"epoch": 2.79,
"learning_rate": 7.233032747982914e-07,
"loss": 0.2723,
"step": 9880
},
{
"epoch": 2.79,
"learning_rate": 7.13811105837684e-07,
"loss": 0.2647,
"step": 9890
},
{
"epoch": 2.79,
"learning_rate": 7.043189368770764e-07,
"loss": 0.4874,
"step": 9900
},
{
"epoch": 2.79,
"learning_rate": 6.94826767916469e-07,
"loss": 0.0944,
"step": 9910
},
{
"epoch": 2.8,
"learning_rate": 6.853345989558614e-07,
"loss": 0.2228,
"step": 9920
},
{
"epoch": 2.8,
"learning_rate": 6.75842429995254e-07,
"loss": 0.3151,
"step": 9930
},
{
"epoch": 2.8,
"learning_rate": 6.663502610346465e-07,
"loss": 0.3432,
"step": 9940
},
{
"epoch": 2.81,
"learning_rate": 6.568580920740391e-07,
"loss": 0.4183,
"step": 9950
},
{
"epoch": 2.81,
"learning_rate": 6.473659231134315e-07,
"loss": 0.2398,
"step": 9960
},
{
"epoch": 2.81,
"learning_rate": 6.37873754152824e-07,
"loss": 0.2553,
"step": 9970
},
{
"epoch": 2.81,
"learning_rate": 6.283815851922164e-07,
"loss": 0.2551,
"step": 9980
},
{
"epoch": 2.82,
"learning_rate": 6.18889416231609e-07,
"loss": 0.4634,
"step": 9990
},
{
"epoch": 2.82,
"learning_rate": 6.093972472710015e-07,
"loss": 0.418,
"step": 10000
},
{
"epoch": 2.82,
"eval_loss": 0.34635499119758606,
"eval_runtime": 210.2528,
"eval_samples_per_second": 9.612,
"eval_steps_per_second": 2.407,
"step": 10000
},
{
"epoch": 2.82,
"learning_rate": 5.99905078310394e-07,
"loss": 0.3378,
"step": 10010
},
{
"epoch": 2.82,
"learning_rate": 5.904129093497864e-07,
"loss": 0.3378,
"step": 10020
},
{
"epoch": 2.83,
"learning_rate": 5.809207403891789e-07,
"loss": 0.1693,
"step": 10030
},
{
"epoch": 2.83,
"learning_rate": 5.714285714285715e-07,
"loss": 0.3012,
"step": 10040
},
{
"epoch": 2.83,
"learning_rate": 5.61936402467964e-07,
"loss": 0.2447,
"step": 10050
},
{
"epoch": 2.84,
"learning_rate": 5.524442335073565e-07,
"loss": 0.2869,
"step": 10060
},
{
"epoch": 2.84,
"learning_rate": 5.42952064546749e-07,
"loss": 0.3656,
"step": 10070
},
{
"epoch": 2.84,
"learning_rate": 5.334598955861415e-07,
"loss": 0.1434,
"step": 10080
},
{
"epoch": 2.84,
"learning_rate": 5.239677266255339e-07,
"loss": 0.1777,
"step": 10090
},
{
"epoch": 2.85,
"learning_rate": 5.144755576649265e-07,
"loss": 0.5208,
"step": 10100
},
{
"epoch": 2.85,
"learning_rate": 5.04983388704319e-07,
"loss": 0.5794,
"step": 10110
},
{
"epoch": 2.85,
"learning_rate": 4.954912197437114e-07,
"loss": 0.2979,
"step": 10120
},
{
"epoch": 2.86,
"learning_rate": 4.859990507831039e-07,
"loss": 0.2351,
"step": 10130
},
{
"epoch": 2.86,
"learning_rate": 4.7650688182249645e-07,
"loss": 0.2441,
"step": 10140
},
{
"epoch": 2.86,
"learning_rate": 4.67014712861889e-07,
"loss": 0.3115,
"step": 10150
},
{
"epoch": 2.86,
"learning_rate": 4.575225439012815e-07,
"loss": 0.2808,
"step": 10160
},
{
"epoch": 2.87,
"learning_rate": 4.48030374940674e-07,
"loss": 0.2838,
"step": 10170
},
{
"epoch": 2.87,
"learning_rate": 4.385382059800665e-07,
"loss": 0.2347,
"step": 10180
},
{
"epoch": 2.87,
"learning_rate": 4.29046037019459e-07,
"loss": 0.4912,
"step": 10190
},
{
"epoch": 2.88,
"learning_rate": 4.195538680588515e-07,
"loss": 0.2631,
"step": 10200
},
{
"epoch": 2.88,
"learning_rate": 4.1006169909824394e-07,
"loss": 0.2965,
"step": 10210
},
{
"epoch": 2.88,
"learning_rate": 4.0056953013763643e-07,
"loss": 0.3917,
"step": 10220
},
{
"epoch": 2.88,
"learning_rate": 3.91077361177029e-07,
"loss": 0.5057,
"step": 10230
},
{
"epoch": 2.89,
"learning_rate": 3.8158519221642146e-07,
"loss": 0.4255,
"step": 10240
},
{
"epoch": 2.89,
"learning_rate": 3.7209302325581396e-07,
"loss": 0.2203,
"step": 10250
},
{
"epoch": 2.89,
"learning_rate": 3.626008542952065e-07,
"loss": 0.4773,
"step": 10260
},
{
"epoch": 2.9,
"learning_rate": 3.53108685334599e-07,
"loss": 0.2554,
"step": 10270
},
{
"epoch": 2.9,
"learning_rate": 3.436165163739915e-07,
"loss": 0.389,
"step": 10280
},
{
"epoch": 2.9,
"learning_rate": 3.34124347413384e-07,
"loss": 0.2402,
"step": 10290
},
{
"epoch": 2.9,
"learning_rate": 3.246321784527765e-07,
"loss": 0.447,
"step": 10300
},
{
"epoch": 2.91,
"learning_rate": 3.1514000949216895e-07,
"loss": 0.3556,
"step": 10310
},
{
"epoch": 2.91,
"learning_rate": 3.056478405315615e-07,
"loss": 0.3357,
"step": 10320
},
{
"epoch": 2.91,
"learning_rate": 2.96155671570954e-07,
"loss": 0.5372,
"step": 10330
},
{
"epoch": 2.92,
"learning_rate": 2.866635026103465e-07,
"loss": 0.3488,
"step": 10340
},
{
"epoch": 2.92,
"learning_rate": 2.7717133364973897e-07,
"loss": 0.4506,
"step": 10350
},
{
"epoch": 2.92,
"learning_rate": 2.676791646891315e-07,
"loss": 0.3523,
"step": 10360
},
{
"epoch": 2.92,
"learning_rate": 2.58186995728524e-07,
"loss": 0.3725,
"step": 10370
},
{
"epoch": 2.93,
"learning_rate": 2.486948267679165e-07,
"loss": 0.5271,
"step": 10380
},
{
"epoch": 2.93,
"learning_rate": 2.39202657807309e-07,
"loss": 0.2397,
"step": 10390
},
{
"epoch": 2.93,
"learning_rate": 2.297104888467015e-07,
"loss": 0.1559,
"step": 10400
},
{
"epoch": 2.93,
"learning_rate": 2.20218319886094e-07,
"loss": 0.4188,
"step": 10410
},
{
"epoch": 2.94,
"learning_rate": 2.107261509254865e-07,
"loss": 0.5267,
"step": 10420
},
{
"epoch": 2.94,
"learning_rate": 2.0123398196487897e-07,
"loss": 0.5628,
"step": 10430
},
{
"epoch": 2.94,
"learning_rate": 1.917418130042715e-07,
"loss": 0.4105,
"step": 10440
},
{
"epoch": 2.95,
"learning_rate": 1.82249644043664e-07,
"loss": 0.2813,
"step": 10450
},
{
"epoch": 2.95,
"learning_rate": 1.727574750830565e-07,
"loss": 0.3071,
"step": 10460
},
{
"epoch": 2.95,
"learning_rate": 1.6326530612244901e-07,
"loss": 0.3369,
"step": 10470
},
{
"epoch": 2.95,
"learning_rate": 1.537731371618415e-07,
"loss": 0.1922,
"step": 10480
},
{
"epoch": 2.96,
"learning_rate": 1.44280968201234e-07,
"loss": 0.461,
"step": 10490
},
{
"epoch": 2.96,
"learning_rate": 1.3478879924062649e-07,
"loss": 0.3687,
"step": 10500
},
{
"epoch": 2.96,
"eval_loss": 0.34634825587272644,
"eval_runtime": 209.8305,
"eval_samples_per_second": 9.632,
"eval_steps_per_second": 2.411,
"step": 10500
},
{
"epoch": 2.96,
"learning_rate": 4.115502904658845e-06,
"loss": 0.4669,
"step": 10510
},
{
"epoch": 2.97,
"learning_rate": 4.109807495158902e-06,
"loss": 0.3283,
"step": 10520
},
{
"epoch": 2.97,
"learning_rate": 4.104112085658959e-06,
"loss": 0.2859,
"step": 10530
},
{
"epoch": 2.97,
"learning_rate": 4.098416676159016e-06,
"loss": 0.2623,
"step": 10540
},
{
"epoch": 2.97,
"learning_rate": 4.0927212666590734e-06,
"loss": 0.3267,
"step": 10550
},
{
"epoch": 2.98,
"learning_rate": 4.08702585715913e-06,
"loss": 0.278,
"step": 10560
},
{
"epoch": 2.98,
"learning_rate": 4.081330447659187e-06,
"loss": 0.487,
"step": 10570
},
{
"epoch": 2.98,
"learning_rate": 4.075635038159244e-06,
"loss": 0.3299,
"step": 10580
},
{
"epoch": 2.99,
"learning_rate": 4.069939628659301e-06,
"loss": 0.4466,
"step": 10590
},
{
"epoch": 2.99,
"learning_rate": 4.064244219159358e-06,
"loss": 0.2438,
"step": 10600
},
{
"epoch": 2.99,
"learning_rate": 4.058548809659415e-06,
"loss": 0.6179,
"step": 10610
},
{
"epoch": 2.99,
"learning_rate": 4.052853400159472e-06,
"loss": 0.3405,
"step": 10620
},
{
"epoch": 3.0,
"learning_rate": 4.047157990659529e-06,
"loss": 0.344,
"step": 10630
},
{
"epoch": 3.0,
"learning_rate": 4.041462581159586e-06,
"loss": 0.3586,
"step": 10640
},
{
"epoch": 3.0,
"learning_rate": 4.035767171659642e-06,
"loss": 0.1847,
"step": 10650
},
{
"epoch": 3.01,
"learning_rate": 4.0300717621597e-06,
"loss": 0.3832,
"step": 10660
},
{
"epoch": 3.01,
"learning_rate": 4.024376352659757e-06,
"loss": 0.2131,
"step": 10670
},
{
"epoch": 3.01,
"learning_rate": 4.018680943159813e-06,
"loss": 0.3245,
"step": 10680
},
{
"epoch": 3.01,
"learning_rate": 4.01298553365987e-06,
"loss": 0.5644,
"step": 10690
},
{
"epoch": 3.02,
"learning_rate": 4.007290124159927e-06,
"loss": 0.1613,
"step": 10700
},
{
"epoch": 3.02,
"learning_rate": 4.001594714659984e-06,
"loss": 0.3756,
"step": 10710
},
{
"epoch": 3.02,
"learning_rate": 3.9958993051600414e-06,
"loss": 0.1069,
"step": 10720
},
{
"epoch": 3.03,
"learning_rate": 3.990203895660098e-06,
"loss": 0.4927,
"step": 10730
},
{
"epoch": 3.03,
"learning_rate": 3.9845084861601555e-06,
"loss": 0.386,
"step": 10740
},
{
"epoch": 3.03,
"learning_rate": 3.9788130766602126e-06,
"loss": 0.3843,
"step": 10750
},
{
"epoch": 3.03,
"learning_rate": 3.973117667160269e-06,
"loss": 0.2719,
"step": 10760
},
{
"epoch": 3.04,
"learning_rate": 3.967422257660326e-06,
"loss": 0.4034,
"step": 10770
},
{
"epoch": 3.04,
"learning_rate": 3.961726848160383e-06,
"loss": 0.3937,
"step": 10780
},
{
"epoch": 3.04,
"learning_rate": 3.95603143866044e-06,
"loss": 0.4995,
"step": 10790
},
{
"epoch": 3.04,
"learning_rate": 3.950336029160497e-06,
"loss": 0.2434,
"step": 10800
},
{
"epoch": 3.05,
"learning_rate": 3.944640619660554e-06,
"loss": 0.3839,
"step": 10810
},
{
"epoch": 3.05,
"learning_rate": 3.938945210160611e-06,
"loss": 0.3355,
"step": 10820
},
{
"epoch": 3.05,
"learning_rate": 3.933249800660668e-06,
"loss": 0.2876,
"step": 10830
},
{
"epoch": 3.06,
"learning_rate": 3.927554391160725e-06,
"loss": 0.4364,
"step": 10840
},
{
"epoch": 3.06,
"learning_rate": 3.921858981660781e-06,
"loss": 0.2628,
"step": 10850
},
{
"epoch": 3.06,
"learning_rate": 3.916163572160839e-06,
"loss": 0.3119,
"step": 10860
},
{
"epoch": 3.06,
"learning_rate": 3.910468162660896e-06,
"loss": 0.1733,
"step": 10870
},
{
"epoch": 3.07,
"learning_rate": 3.904772753160952e-06,
"loss": 0.2827,
"step": 10880
},
{
"epoch": 3.07,
"learning_rate": 3.899077343661009e-06,
"loss": 0.1766,
"step": 10890
},
{
"epoch": 3.07,
"learning_rate": 3.8933819341610665e-06,
"loss": 0.2672,
"step": 10900
},
{
"epoch": 3.08,
"learning_rate": 3.8876865246611235e-06,
"loss": 0.3424,
"step": 10910
},
{
"epoch": 3.08,
"learning_rate": 3.8819911151611805e-06,
"loss": 0.4487,
"step": 10920
},
{
"epoch": 3.08,
"learning_rate": 3.876295705661237e-06,
"loss": 0.4172,
"step": 10930
},
{
"epoch": 3.08,
"learning_rate": 3.870600296161295e-06,
"loss": 0.3854,
"step": 10940
},
{
"epoch": 3.09,
"learning_rate": 3.864904886661352e-06,
"loss": 0.3686,
"step": 10950
},
{
"epoch": 3.09,
"learning_rate": 3.859209477161408e-06,
"loss": 0.2533,
"step": 10960
},
{
"epoch": 3.09,
"learning_rate": 3.853514067661465e-06,
"loss": 0.1369,
"step": 10970
},
{
"epoch": 3.1,
"learning_rate": 3.847818658161522e-06,
"loss": 0.4269,
"step": 10980
},
{
"epoch": 3.1,
"learning_rate": 3.842123248661579e-06,
"loss": 0.2595,
"step": 10990
},
{
"epoch": 3.1,
"learning_rate": 3.836427839161636e-06,
"loss": 0.2355,
"step": 11000
},
{
"epoch": 3.1,
"eval_loss": 0.34977805614471436,
"eval_runtime": 212.3154,
"eval_samples_per_second": 9.519,
"eval_steps_per_second": 2.383,
"step": 11000
},
{
"epoch": 3.1,
"learning_rate": 3.830732429661693e-06,
"loss": 0.3466,
"step": 11010
},
{
"epoch": 3.11,
"learning_rate": 3.82503702016175e-06,
"loss": 0.1506,
"step": 11020
},
{
"epoch": 3.11,
"learning_rate": 3.819341610661807e-06,
"loss": 0.2931,
"step": 11030
},
{
"epoch": 3.11,
"learning_rate": 3.813646201161864e-06,
"loss": 0.5066,
"step": 11040
},
{
"epoch": 3.12,
"learning_rate": 3.807950791661921e-06,
"loss": 0.3229,
"step": 11050
},
{
"epoch": 3.12,
"learning_rate": 3.802255382161978e-06,
"loss": 0.2496,
"step": 11060
},
{
"epoch": 3.12,
"learning_rate": 3.7965599726620344e-06,
"loss": 0.4409,
"step": 11070
},
{
"epoch": 3.12,
"learning_rate": 3.7908645631620915e-06,
"loss": 0.3241,
"step": 11080
},
{
"epoch": 3.13,
"learning_rate": 3.785169153662149e-06,
"loss": 0.2519,
"step": 11090
},
{
"epoch": 3.13,
"learning_rate": 3.7794737441622056e-06,
"loss": 0.4325,
"step": 11100
},
{
"epoch": 3.13,
"learning_rate": 3.7737783346622626e-06,
"loss": 0.3483,
"step": 11110
},
{
"epoch": 3.14,
"learning_rate": 3.7680829251623196e-06,
"loss": 0.3515,
"step": 11120
},
{
"epoch": 3.14,
"learning_rate": 3.7623875156623763e-06,
"loss": 0.4152,
"step": 11130
},
{
"epoch": 3.14,
"learning_rate": 3.7566921061624333e-06,
"loss": 0.3937,
"step": 11140
},
{
"epoch": 3.14,
"learning_rate": 3.7509966966624903e-06,
"loss": 0.4958,
"step": 11150
},
{
"epoch": 3.15,
"learning_rate": 3.745301287162547e-06,
"loss": 0.3978,
"step": 11160
},
{
"epoch": 3.15,
"learning_rate": 3.7396058776626044e-06,
"loss": 0.3547,
"step": 11170
},
{
"epoch": 3.15,
"learning_rate": 3.7339104681626615e-06,
"loss": 0.1469,
"step": 11180
},
{
"epoch": 3.15,
"learning_rate": 3.728215058662718e-06,
"loss": 0.4382,
"step": 11190
},
{
"epoch": 3.16,
"learning_rate": 3.722519649162775e-06,
"loss": 0.234,
"step": 11200
},
{
"epoch": 3.16,
"learning_rate": 3.7168242396628317e-06,
"loss": 0.3652,
"step": 11210
},
{
"epoch": 3.16,
"learning_rate": 3.7111288301628888e-06,
"loss": 0.2035,
"step": 11220
},
{
"epoch": 3.17,
"learning_rate": 3.7054334206629462e-06,
"loss": 0.1942,
"step": 11230
},
{
"epoch": 3.17,
"learning_rate": 3.699738011163003e-06,
"loss": 0.6065,
"step": 11240
},
{
"epoch": 3.17,
"learning_rate": 3.69404260166306e-06,
"loss": 0.3618,
"step": 11250
},
{
"epoch": 3.17,
"learning_rate": 3.688347192163117e-06,
"loss": 0.3662,
"step": 11260
},
{
"epoch": 3.18,
"learning_rate": 3.6826517826631736e-06,
"loss": 0.2511,
"step": 11270
},
{
"epoch": 3.18,
"learning_rate": 3.6769563731632306e-06,
"loss": 0.3071,
"step": 11280
},
{
"epoch": 3.18,
"learning_rate": 3.671260963663288e-06,
"loss": 0.4005,
"step": 11290
},
{
"epoch": 3.19,
"learning_rate": 3.6655655541633447e-06,
"loss": 0.3301,
"step": 11300
},
{
"epoch": 3.19,
"learning_rate": 3.6598701446634017e-06,
"loss": 0.3304,
"step": 11310
},
{
"epoch": 3.19,
"learning_rate": 3.6541747351634588e-06,
"loss": 0.4917,
"step": 11320
},
{
"epoch": 3.19,
"learning_rate": 3.6484793256635154e-06,
"loss": 0.4638,
"step": 11330
},
{
"epoch": 3.2,
"learning_rate": 3.6427839161635724e-06,
"loss": 0.3115,
"step": 11340
},
{
"epoch": 3.2,
"learning_rate": 3.63708850666363e-06,
"loss": 0.322,
"step": 11350
},
{
"epoch": 3.2,
"learning_rate": 3.631393097163686e-06,
"loss": 0.365,
"step": 11360
},
{
"epoch": 3.21,
"learning_rate": 3.6256976876637435e-06,
"loss": 0.361,
"step": 11370
},
{
"epoch": 3.21,
"learning_rate": 3.6200022781638e-06,
"loss": 0.3455,
"step": 11380
},
{
"epoch": 3.21,
"learning_rate": 3.614306868663857e-06,
"loss": 0.3239,
"step": 11390
},
{
"epoch": 3.21,
"learning_rate": 3.6086114591639142e-06,
"loss": 0.229,
"step": 11400
},
{
"epoch": 3.22,
"learning_rate": 3.602916049663971e-06,
"loss": 0.3363,
"step": 11410
},
{
"epoch": 3.22,
"learning_rate": 3.597220640164028e-06,
"loss": 0.3821,
"step": 11420
},
{
"epoch": 3.22,
"learning_rate": 3.5915252306640854e-06,
"loss": 0.5456,
"step": 11430
},
{
"epoch": 3.23,
"learning_rate": 3.585829821164142e-06,
"loss": 0.3119,
"step": 11440
},
{
"epoch": 3.23,
"learning_rate": 3.580134411664199e-06,
"loss": 0.1574,
"step": 11450
},
{
"epoch": 3.23,
"learning_rate": 3.574439002164256e-06,
"loss": 0.4289,
"step": 11460
},
{
"epoch": 3.23,
"learning_rate": 3.5687435926643127e-06,
"loss": 0.5156,
"step": 11470
},
{
"epoch": 3.24,
"learning_rate": 3.5630481831643697e-06,
"loss": 0.2907,
"step": 11480
},
{
"epoch": 3.24,
"learning_rate": 3.557352773664427e-06,
"loss": 0.4394,
"step": 11490
},
{
"epoch": 3.24,
"learning_rate": 3.5516573641644838e-06,
"loss": 0.4248,
"step": 11500
},
{
"epoch": 3.24,
"eval_loss": 0.3501090109348297,
"eval_runtime": 212.2293,
"eval_samples_per_second": 9.523,
"eval_steps_per_second": 2.384,
"step": 11500
},
{
"epoch": 3.24,
"learning_rate": 3.545961954664541e-06,
"loss": 0.3417,
"step": 11510
},
{
"epoch": 3.25,
"learning_rate": 3.540266545164598e-06,
"loss": 0.4421,
"step": 11520
},
{
"epoch": 3.25,
"learning_rate": 3.5345711356646545e-06,
"loss": 0.0987,
"step": 11530
},
{
"epoch": 3.25,
"learning_rate": 3.5288757261647115e-06,
"loss": 0.2601,
"step": 11540
},
{
"epoch": 3.26,
"learning_rate": 3.523180316664768e-06,
"loss": 0.5805,
"step": 11550
},
{
"epoch": 3.26,
"learning_rate": 3.517484907164825e-06,
"loss": 0.3845,
"step": 11560
},
{
"epoch": 3.26,
"learning_rate": 3.5117894976648826e-06,
"loss": 0.5719,
"step": 11570
},
{
"epoch": 3.26,
"learning_rate": 3.5060940881649393e-06,
"loss": 0.2547,
"step": 11580
},
{
"epoch": 3.27,
"learning_rate": 3.5003986786649963e-06,
"loss": 0.4258,
"step": 11590
},
{
"epoch": 3.27,
"learning_rate": 3.4947032691650533e-06,
"loss": 0.3509,
"step": 11600
},
{
"epoch": 3.27,
"learning_rate": 3.48900785966511e-06,
"loss": 0.4785,
"step": 11610
},
{
"epoch": 3.28,
"learning_rate": 3.483312450165167e-06,
"loss": 0.3351,
"step": 11620
},
{
"epoch": 3.28,
"learning_rate": 3.4776170406652245e-06,
"loss": 0.319,
"step": 11630
},
{
"epoch": 3.28,
"learning_rate": 3.471921631165281e-06,
"loss": 0.5243,
"step": 11640
},
{
"epoch": 3.28,
"learning_rate": 3.466226221665338e-06,
"loss": 0.3207,
"step": 11650
},
{
"epoch": 3.29,
"learning_rate": 3.460530812165395e-06,
"loss": 0.3911,
"step": 11660
},
{
"epoch": 3.29,
"learning_rate": 3.4548354026654518e-06,
"loss": 0.4672,
"step": 11670
},
{
"epoch": 3.29,
"learning_rate": 3.449139993165509e-06,
"loss": 0.3416,
"step": 11680
},
{
"epoch": 3.3,
"learning_rate": 3.4434445836655663e-06,
"loss": 0.1996,
"step": 11690
},
{
"epoch": 3.3,
"learning_rate": 3.437749174165623e-06,
"loss": 0.2258,
"step": 11700
},
{
"epoch": 3.3,
"learning_rate": 3.43205376466568e-06,
"loss": 0.6087,
"step": 11710
},
{
"epoch": 3.3,
"learning_rate": 3.4263583551657366e-06,
"loss": 0.5061,
"step": 11720
},
{
"epoch": 3.31,
"learning_rate": 3.4206629456657936e-06,
"loss": 0.318,
"step": 11730
},
{
"epoch": 3.31,
"learning_rate": 3.4149675361658506e-06,
"loss": 0.2787,
"step": 11740
},
{
"epoch": 3.31,
"learning_rate": 3.4092721266659073e-06,
"loss": 0.2786,
"step": 11750
},
{
"epoch": 3.32,
"learning_rate": 3.4035767171659643e-06,
"loss": 0.3459,
"step": 11760
},
{
"epoch": 3.32,
"learning_rate": 3.3978813076660218e-06,
"loss": 0.3916,
"step": 11770
},
{
"epoch": 3.32,
"learning_rate": 3.3921858981660784e-06,
"loss": 0.485,
"step": 11780
},
{
"epoch": 3.32,
"learning_rate": 3.3864904886661354e-06,
"loss": 0.1479,
"step": 11790
},
{
"epoch": 3.33,
"learning_rate": 3.3807950791661925e-06,
"loss": 0.2049,
"step": 11800
},
{
"epoch": 3.33,
"learning_rate": 3.375099669666249e-06,
"loss": 0.2876,
"step": 11810
},
{
"epoch": 3.33,
"learning_rate": 3.369404260166306e-06,
"loss": 0.4562,
"step": 11820
},
{
"epoch": 3.34,
"learning_rate": 3.3637088506663636e-06,
"loss": 0.3319,
"step": 11830
},
{
"epoch": 3.34,
"learning_rate": 3.35801344116642e-06,
"loss": 0.442,
"step": 11840
},
{
"epoch": 3.34,
"learning_rate": 3.3523180316664772e-06,
"loss": 0.3675,
"step": 11850
},
{
"epoch": 3.34,
"learning_rate": 3.346622622166534e-06,
"loss": 0.3927,
"step": 11860
},
{
"epoch": 3.35,
"learning_rate": 3.340927212666591e-06,
"loss": 0.6269,
"step": 11870
},
{
"epoch": 3.35,
"learning_rate": 3.335231803166648e-06,
"loss": 0.1756,
"step": 11880
},
{
"epoch": 3.35,
"learning_rate": 3.3295363936667045e-06,
"loss": 0.4247,
"step": 11890
},
{
"epoch": 3.35,
"learning_rate": 3.323840984166762e-06,
"loss": 0.3975,
"step": 11900
},
{
"epoch": 3.36,
"learning_rate": 3.318145574666819e-06,
"loss": 0.4755,
"step": 11910
},
{
"epoch": 3.36,
"learning_rate": 3.3124501651668757e-06,
"loss": 0.379,
"step": 11920
},
{
"epoch": 3.36,
"learning_rate": 3.3067547556669327e-06,
"loss": 0.3689,
"step": 11930
},
{
"epoch": 3.37,
"learning_rate": 3.3010593461669897e-06,
"loss": 0.2979,
"step": 11940
},
{
"epoch": 3.37,
"learning_rate": 3.2953639366670464e-06,
"loss": 0.455,
"step": 11950
},
{
"epoch": 3.37,
"learning_rate": 3.2896685271671034e-06,
"loss": 0.4162,
"step": 11960
},
{
"epoch": 3.37,
"learning_rate": 3.283973117667161e-06,
"loss": 0.4755,
"step": 11970
},
{
"epoch": 3.38,
"learning_rate": 3.2782777081672175e-06,
"loss": 0.3042,
"step": 11980
},
{
"epoch": 3.38,
"learning_rate": 3.2725822986672745e-06,
"loss": 0.286,
"step": 11990
},
{
"epoch": 3.38,
"learning_rate": 3.2668868891673316e-06,
"loss": 0.4205,
"step": 12000
},
{
"epoch": 3.38,
"eval_loss": 0.3515862822532654,
"eval_runtime": 210.7037,
"eval_samples_per_second": 9.592,
"eval_steps_per_second": 2.401,
"step": 12000
},
{
"epoch": 3.39,
"learning_rate": 3.261191479667388e-06,
"loss": 0.4638,
"step": 12010
},
{
"epoch": 3.39,
"learning_rate": 3.2554960701674452e-06,
"loss": 0.4955,
"step": 12020
},
{
"epoch": 3.39,
"learning_rate": 3.249800660667502e-06,
"loss": 0.3171,
"step": 12030
},
{
"epoch": 3.39,
"learning_rate": 3.2441052511675593e-06,
"loss": 0.3157,
"step": 12040
},
{
"epoch": 3.4,
"learning_rate": 3.2384098416676163e-06,
"loss": 0.4029,
"step": 12050
},
{
"epoch": 3.4,
"learning_rate": 3.232714432167673e-06,
"loss": 0.6141,
"step": 12060
},
{
"epoch": 3.4,
"learning_rate": 3.22701902266773e-06,
"loss": 0.5261,
"step": 12070
},
{
"epoch": 3.41,
"learning_rate": 3.221323613167787e-06,
"loss": 0.4287,
"step": 12080
},
{
"epoch": 3.41,
"learning_rate": 3.2156282036678437e-06,
"loss": 0.3101,
"step": 12090
},
{
"epoch": 3.41,
"learning_rate": 3.209932794167901e-06,
"loss": 0.4954,
"step": 12100
},
{
"epoch": 3.41,
"learning_rate": 3.204237384667958e-06,
"loss": 0.4279,
"step": 12110
},
{
"epoch": 3.42,
"learning_rate": 3.1985419751680148e-06,
"loss": 0.1931,
"step": 12120
},
{
"epoch": 3.42,
"learning_rate": 3.192846565668072e-06,
"loss": 0.4763,
"step": 12130
},
{
"epoch": 3.42,
"learning_rate": 3.187151156168129e-06,
"loss": 0.1875,
"step": 12140
},
{
"epoch": 3.43,
"learning_rate": 3.1814557466681855e-06,
"loss": 0.1963,
"step": 12150
},
{
"epoch": 3.43,
"learning_rate": 3.175760337168243e-06,
"loss": 0.3605,
"step": 12160
},
{
"epoch": 3.43,
"learning_rate": 3.1700649276683e-06,
"loss": 0.2117,
"step": 12170
},
{
"epoch": 3.43,
"learning_rate": 3.1643695181683566e-06,
"loss": 0.4029,
"step": 12180
},
{
"epoch": 3.44,
"learning_rate": 3.1586741086684136e-06,
"loss": 0.3201,
"step": 12190
},
{
"epoch": 3.44,
"learning_rate": 3.1529786991684702e-06,
"loss": 0.4943,
"step": 12200
},
{
"epoch": 3.44,
"learning_rate": 3.1472832896685273e-06,
"loss": 0.459,
"step": 12210
},
{
"epoch": 3.45,
"learning_rate": 3.1415878801685843e-06,
"loss": 0.1145,
"step": 12220
},
{
"epoch": 3.45,
"learning_rate": 3.135892470668641e-06,
"loss": 0.3262,
"step": 12230
},
{
"epoch": 3.45,
"learning_rate": 3.1301970611686984e-06,
"loss": 0.3552,
"step": 12240
},
{
"epoch": 3.45,
"learning_rate": 3.1245016516687554e-06,
"loss": 0.3953,
"step": 12250
},
{
"epoch": 3.46,
"learning_rate": 3.118806242168812e-06,
"loss": 0.2188,
"step": 12260
},
{
"epoch": 3.46,
"learning_rate": 3.113110832668869e-06,
"loss": 0.2279,
"step": 12270
},
{
"epoch": 3.46,
"learning_rate": 3.107415423168926e-06,
"loss": 0.398,
"step": 12280
},
{
"epoch": 3.46,
"learning_rate": 3.1017200136689828e-06,
"loss": 0.4347,
"step": 12290
},
{
"epoch": 3.47,
"learning_rate": 3.0960246041690402e-06,
"loss": 0.232,
"step": 12300
},
{
"epoch": 3.47,
"learning_rate": 3.0903291946690973e-06,
"loss": 0.2781,
"step": 12310
},
{
"epoch": 3.47,
"learning_rate": 3.084633785169154e-06,
"loss": 0.3938,
"step": 12320
},
{
"epoch": 3.48,
"learning_rate": 3.078938375669211e-06,
"loss": 0.5736,
"step": 12330
},
{
"epoch": 3.48,
"learning_rate": 3.073242966169268e-06,
"loss": 0.327,
"step": 12340
},
{
"epoch": 3.48,
"learning_rate": 3.0675475566693246e-06,
"loss": 0.2571,
"step": 12350
},
{
"epoch": 3.48,
"learning_rate": 3.061852147169382e-06,
"loss": 0.3868,
"step": 12360
},
{
"epoch": 3.49,
"learning_rate": 3.0561567376694382e-06,
"loss": 0.3174,
"step": 12370
},
{
"epoch": 3.49,
"learning_rate": 3.0504613281694957e-06,
"loss": 0.2424,
"step": 12380
},
{
"epoch": 3.49,
"learning_rate": 3.0447659186695527e-06,
"loss": 0.2545,
"step": 12390
},
{
"epoch": 3.5,
"learning_rate": 3.0390705091696094e-06,
"loss": 0.4465,
"step": 12400
},
{
"epoch": 3.5,
"learning_rate": 3.0333750996696664e-06,
"loss": 0.304,
"step": 12410
},
{
"epoch": 3.5,
"learning_rate": 3.0276796901697234e-06,
"loss": 0.3063,
"step": 12420
},
{
"epoch": 3.5,
"learning_rate": 3.02198428066978e-06,
"loss": 0.3685,
"step": 12430
},
{
"epoch": 3.51,
"learning_rate": 3.0162888711698375e-06,
"loss": 0.4217,
"step": 12440
},
{
"epoch": 3.51,
"learning_rate": 3.0105934616698946e-06,
"loss": 0.3801,
"step": 12450
},
{
"epoch": 3.51,
"learning_rate": 3.004898052169951e-06,
"loss": 0.2402,
"step": 12460
},
{
"epoch": 3.52,
"learning_rate": 2.9992026426700082e-06,
"loss": 0.3375,
"step": 12470
},
{
"epoch": 3.52,
"learning_rate": 2.9935072331700653e-06,
"loss": 0.4198,
"step": 12480
},
{
"epoch": 3.52,
"learning_rate": 2.987811823670122e-06,
"loss": 0.4481,
"step": 12490
},
{
"epoch": 3.52,
"learning_rate": 2.9821164141701793e-06,
"loss": 0.1827,
"step": 12500
},
{
"epoch": 3.52,
"eval_loss": 0.3478640913963318,
"eval_runtime": 211.175,
"eval_samples_per_second": 9.57,
"eval_steps_per_second": 2.396,
"step": 12500
},
{
"epoch": 3.53,
"learning_rate": 2.976421004670236e-06,
"loss": 0.2672,
"step": 12510
},
{
"epoch": 3.53,
"learning_rate": 2.970725595170293e-06,
"loss": 0.3767,
"step": 12520
},
{
"epoch": 3.53,
"learning_rate": 2.96503018567035e-06,
"loss": 0.356,
"step": 12530
},
{
"epoch": 3.54,
"learning_rate": 2.9593347761704066e-06,
"loss": 0.2137,
"step": 12540
},
{
"epoch": 3.54,
"learning_rate": 2.9536393666704637e-06,
"loss": 0.156,
"step": 12550
},
{
"epoch": 3.54,
"learning_rate": 2.947943957170521e-06,
"loss": 0.2804,
"step": 12560
},
{
"epoch": 3.54,
"learning_rate": 2.9422485476705773e-06,
"loss": 0.346,
"step": 12570
},
{
"epoch": 3.55,
"learning_rate": 2.936553138170635e-06,
"loss": 0.3009,
"step": 12580
},
{
"epoch": 3.55,
"learning_rate": 2.930857728670692e-06,
"loss": 0.3055,
"step": 12590
},
{
"epoch": 3.55,
"learning_rate": 2.9251623191707485e-06,
"loss": 0.4737,
"step": 12600
},
{
"epoch": 3.56,
"learning_rate": 2.9194669096708055e-06,
"loss": 0.214,
"step": 12610
},
{
"epoch": 3.56,
"learning_rate": 2.9137715001708625e-06,
"loss": 0.3206,
"step": 12620
},
{
"epoch": 3.56,
"learning_rate": 2.908076090670919e-06,
"loss": 0.5329,
"step": 12630
},
{
"epoch": 3.56,
"learning_rate": 2.9023806811709766e-06,
"loss": 0.1631,
"step": 12640
},
{
"epoch": 3.57,
"learning_rate": 2.8966852716710337e-06,
"loss": 0.3896,
"step": 12650
},
{
"epoch": 3.57,
"learning_rate": 2.8909898621710903e-06,
"loss": 0.7929,
"step": 12660
},
{
"epoch": 3.57,
"learning_rate": 2.8852944526711473e-06,
"loss": 0.3773,
"step": 12670
},
{
"epoch": 3.57,
"learning_rate": 2.879599043171204e-06,
"loss": 0.2049,
"step": 12680
},
{
"epoch": 3.58,
"learning_rate": 2.873903633671261e-06,
"loss": 0.3991,
"step": 12690
},
{
"epoch": 3.58,
"learning_rate": 2.8682082241713184e-06,
"loss": 0.3699,
"step": 12700
},
{
"epoch": 3.58,
"learning_rate": 2.862512814671375e-06,
"loss": 0.5049,
"step": 12710
},
{
"epoch": 3.59,
"learning_rate": 2.856817405171432e-06,
"loss": 0.727,
"step": 12720
},
{
"epoch": 3.59,
"learning_rate": 2.851121995671489e-06,
"loss": 0.4813,
"step": 12730
},
{
"epoch": 3.59,
"learning_rate": 2.8454265861715458e-06,
"loss": 0.2857,
"step": 12740
},
{
"epoch": 3.59,
"learning_rate": 2.839731176671603e-06,
"loss": 0.3309,
"step": 12750
},
{
"epoch": 3.6,
"learning_rate": 2.8340357671716603e-06,
"loss": 0.5852,
"step": 12760
},
{
"epoch": 3.6,
"learning_rate": 2.8283403576717165e-06,
"loss": 0.5285,
"step": 12770
},
{
"epoch": 3.6,
"learning_rate": 2.822644948171774e-06,
"loss": 0.3123,
"step": 12780
},
{
"epoch": 3.61,
"learning_rate": 2.816949538671831e-06,
"loss": 0.3655,
"step": 12790
},
{
"epoch": 3.61,
"learning_rate": 2.8112541291718876e-06,
"loss": 0.227,
"step": 12800
},
{
"epoch": 3.61,
"learning_rate": 2.8055587196719446e-06,
"loss": 0.188,
"step": 12810
},
{
"epoch": 3.61,
"learning_rate": 2.7998633101720017e-06,
"loss": 0.4075,
"step": 12820
},
{
"epoch": 3.62,
"learning_rate": 2.7941679006720583e-06,
"loss": 0.3146,
"step": 12830
},
{
"epoch": 3.62,
"learning_rate": 2.7884724911721157e-06,
"loss": 0.2311,
"step": 12840
},
{
"epoch": 3.62,
"learning_rate": 2.7827770816721724e-06,
"loss": 0.3325,
"step": 12850
},
{
"epoch": 3.63,
"learning_rate": 2.7770816721722294e-06,
"loss": 0.179,
"step": 12860
},
{
"epoch": 3.63,
"learning_rate": 2.7713862626722864e-06,
"loss": 0.3413,
"step": 12870
},
{
"epoch": 3.63,
"learning_rate": 2.765690853172343e-06,
"loss": 0.4583,
"step": 12880
},
{
"epoch": 3.63,
"learning_rate": 2.7599954436724e-06,
"loss": 0.4093,
"step": 12890
},
{
"epoch": 3.64,
"learning_rate": 2.7543000341724576e-06,
"loss": 0.321,
"step": 12900
},
{
"epoch": 3.64,
"learning_rate": 2.748604624672514e-06,
"loss": 0.2316,
"step": 12910
},
{
"epoch": 3.64,
"learning_rate": 2.742909215172571e-06,
"loss": 0.2259,
"step": 12920
},
{
"epoch": 3.65,
"learning_rate": 2.7372138056726283e-06,
"loss": 0.4776,
"step": 12930
},
{
"epoch": 3.65,
"learning_rate": 2.731518396172685e-06,
"loss": 0.1712,
"step": 12940
},
{
"epoch": 3.65,
"learning_rate": 2.725822986672742e-06,
"loss": 0.3603,
"step": 12950
},
{
"epoch": 3.65,
"learning_rate": 2.7201275771727994e-06,
"loss": 0.4242,
"step": 12960
},
{
"epoch": 3.66,
"learning_rate": 2.714432167672856e-06,
"loss": 0.3223,
"step": 12970
},
{
"epoch": 3.66,
"learning_rate": 2.708736758172913e-06,
"loss": 0.3738,
"step": 12980
},
{
"epoch": 3.66,
"learning_rate": 2.70304134867297e-06,
"loss": 0.1973,
"step": 12990
},
{
"epoch": 3.67,
"learning_rate": 2.6973459391730267e-06,
"loss": 0.3688,
"step": 13000
},
{
"epoch": 3.67,
"eval_loss": 0.34860894083976746,
"eval_runtime": 211.4299,
"eval_samples_per_second": 9.559,
"eval_steps_per_second": 2.393,
"step": 13000
},
{
"epoch": 3.67,
"learning_rate": 2.6916505296730837e-06,
"loss": 0.5827,
"step": 13010
},
{
"epoch": 3.67,
"learning_rate": 2.6859551201731403e-06,
"loss": 0.4093,
"step": 13020
},
{
"epoch": 3.67,
"learning_rate": 2.6802597106731974e-06,
"loss": 0.2026,
"step": 13030
},
{
"epoch": 3.68,
"learning_rate": 2.674564301173255e-06,
"loss": 0.3811,
"step": 13040
},
{
"epoch": 3.68,
"learning_rate": 2.6688688916733115e-06,
"loss": 0.4959,
"step": 13050
},
{
"epoch": 3.68,
"learning_rate": 2.6631734821733685e-06,
"loss": 0.2947,
"step": 13060
},
{
"epoch": 3.68,
"learning_rate": 2.6574780726734255e-06,
"loss": 0.3327,
"step": 13070
},
{
"epoch": 3.69,
"learning_rate": 2.651782663173482e-06,
"loss": 0.4354,
"step": 13080
},
{
"epoch": 3.69,
"learning_rate": 2.646087253673539e-06,
"loss": 0.4678,
"step": 13090
},
{
"epoch": 3.69,
"learning_rate": 2.6403918441735967e-06,
"loss": 0.2031,
"step": 13100
},
{
"epoch": 3.7,
"learning_rate": 2.6346964346736533e-06,
"loss": 0.4424,
"step": 13110
},
{
"epoch": 3.7,
"learning_rate": 2.6290010251737103e-06,
"loss": 0.3075,
"step": 13120
},
{
"epoch": 3.7,
"learning_rate": 2.6233056156737674e-06,
"loss": 0.2436,
"step": 13130
},
{
"epoch": 3.7,
"learning_rate": 2.617610206173824e-06,
"loss": 0.2671,
"step": 13140
},
{
"epoch": 3.71,
"learning_rate": 2.611914796673881e-06,
"loss": 0.2847,
"step": 13150
},
{
"epoch": 3.71,
"learning_rate": 2.6062193871739376e-06,
"loss": 0.519,
"step": 13160
},
{
"epoch": 3.71,
"learning_rate": 2.600523977673995e-06,
"loss": 0.3778,
"step": 13170
},
{
"epoch": 3.72,
"learning_rate": 2.594828568174052e-06,
"loss": 0.3399,
"step": 13180
},
{
"epoch": 3.72,
"learning_rate": 2.5891331586741088e-06,
"loss": 0.296,
"step": 13190
},
{
"epoch": 3.72,
"learning_rate": 2.583437749174166e-06,
"loss": 0.4316,
"step": 13200
},
{
"epoch": 3.72,
"learning_rate": 2.577742339674223e-06,
"loss": 0.3023,
"step": 13210
},
{
"epoch": 3.73,
"learning_rate": 2.5720469301742795e-06,
"loss": 0.2552,
"step": 13220
},
{
"epoch": 3.73,
"learning_rate": 2.5663515206743365e-06,
"loss": 0.3943,
"step": 13230
},
{
"epoch": 3.73,
"learning_rate": 2.560656111174394e-06,
"loss": 0.4276,
"step": 13240
},
{
"epoch": 3.74,
"learning_rate": 2.5549607016744506e-06,
"loss": 0.4096,
"step": 13250
},
{
"epoch": 3.74,
"learning_rate": 2.5492652921745076e-06,
"loss": 0.3047,
"step": 13260
},
{
"epoch": 3.74,
"learning_rate": 2.5435698826745647e-06,
"loss": 0.4829,
"step": 13270
},
{
"epoch": 3.74,
"learning_rate": 2.5378744731746213e-06,
"loss": 0.3925,
"step": 13280
},
{
"epoch": 3.75,
"learning_rate": 2.5321790636746783e-06,
"loss": 0.2451,
"step": 13290
},
{
"epoch": 3.75,
"learning_rate": 2.5264836541747358e-06,
"loss": 0.3625,
"step": 13300
},
{
"epoch": 3.75,
"learning_rate": 2.5207882446747924e-06,
"loss": 0.26,
"step": 13310
},
{
"epoch": 3.76,
"learning_rate": 2.5150928351748494e-06,
"loss": 0.4886,
"step": 13320
},
{
"epoch": 3.76,
"learning_rate": 2.509397425674906e-06,
"loss": 0.3842,
"step": 13330
},
{
"epoch": 3.76,
"learning_rate": 2.503702016174963e-06,
"loss": 0.5277,
"step": 13340
},
{
"epoch": 3.76,
"learning_rate": 2.49800660667502e-06,
"loss": 0.537,
"step": 13350
},
{
"epoch": 3.77,
"learning_rate": 2.492311197175077e-06,
"loss": 0.1046,
"step": 13360
},
{
"epoch": 3.77,
"learning_rate": 2.486615787675134e-06,
"loss": 0.3134,
"step": 13370
},
{
"epoch": 3.77,
"learning_rate": 2.480920378175191e-06,
"loss": 0.4783,
"step": 13380
},
{
"epoch": 3.78,
"learning_rate": 2.475224968675248e-06,
"loss": 0.5872,
"step": 13390
},
{
"epoch": 3.78,
"learning_rate": 2.469529559175305e-06,
"loss": 0.3972,
"step": 13400
},
{
"epoch": 3.78,
"learning_rate": 2.463834149675362e-06,
"loss": 0.2255,
"step": 13410
},
{
"epoch": 3.78,
"learning_rate": 2.458138740175419e-06,
"loss": 0.3646,
"step": 13420
},
{
"epoch": 3.79,
"learning_rate": 2.4524433306754756e-06,
"loss": 0.1656,
"step": 13430
},
{
"epoch": 3.79,
"learning_rate": 2.4467479211755326e-06,
"loss": 0.5685,
"step": 13440
},
{
"epoch": 3.79,
"learning_rate": 2.4410525116755897e-06,
"loss": 0.3274,
"step": 13450
},
{
"epoch": 3.79,
"learning_rate": 2.4353571021756467e-06,
"loss": 0.3213,
"step": 13460
},
{
"epoch": 3.8,
"learning_rate": 2.4296616926757038e-06,
"loss": 0.2743,
"step": 13470
},
{
"epoch": 3.8,
"learning_rate": 2.4239662831757604e-06,
"loss": 0.3187,
"step": 13480
},
{
"epoch": 3.8,
"learning_rate": 2.4182708736758174e-06,
"loss": 0.2557,
"step": 13490
},
{
"epoch": 3.81,
"learning_rate": 2.4125754641758745e-06,
"loss": 0.1861,
"step": 13500
},
{
"epoch": 3.81,
"eval_loss": 0.3507066071033478,
"eval_runtime": 212.1488,
"eval_samples_per_second": 9.526,
"eval_steps_per_second": 2.385,
"step": 13500
},
{
"epoch": 3.81,
"learning_rate": 2.4068800546759315e-06,
"loss": 0.3103,
"step": 13510
},
{
"epoch": 3.81,
"learning_rate": 2.4011846451759885e-06,
"loss": 0.4539,
"step": 13520
},
{
"epoch": 3.81,
"learning_rate": 2.395489235676045e-06,
"loss": 0.5474,
"step": 13530
},
{
"epoch": 3.82,
"learning_rate": 2.389793826176102e-06,
"loss": 0.3454,
"step": 13540
},
{
"epoch": 3.82,
"learning_rate": 2.3840984166761592e-06,
"loss": 0.478,
"step": 13550
},
{
"epoch": 3.82,
"learning_rate": 2.3784030071762163e-06,
"loss": 0.3736,
"step": 13560
},
{
"epoch": 3.83,
"learning_rate": 2.3727075976762733e-06,
"loss": 0.3233,
"step": 13570
},
{
"epoch": 3.83,
"learning_rate": 2.36701218817633e-06,
"loss": 0.5434,
"step": 13580
},
{
"epoch": 3.83,
"learning_rate": 2.361316778676387e-06,
"loss": 0.2229,
"step": 13590
},
{
"epoch": 3.83,
"learning_rate": 2.355621369176444e-06,
"loss": 0.2059,
"step": 13600
},
{
"epoch": 3.84,
"learning_rate": 2.349925959676501e-06,
"loss": 0.254,
"step": 13610
},
{
"epoch": 3.84,
"learning_rate": 2.3442305501765577e-06,
"loss": 0.3806,
"step": 13620
},
{
"epoch": 3.84,
"learning_rate": 2.3385351406766147e-06,
"loss": 0.3837,
"step": 13630
},
{
"epoch": 3.85,
"learning_rate": 2.3328397311766718e-06,
"loss": 0.439,
"step": 13640
},
{
"epoch": 3.85,
"learning_rate": 2.327144321676729e-06,
"loss": 0.3859,
"step": 13650
},
{
"epoch": 3.85,
"learning_rate": 2.321448912176786e-06,
"loss": 0.5599,
"step": 13660
},
{
"epoch": 3.85,
"learning_rate": 2.315753502676843e-06,
"loss": 0.3513,
"step": 13670
},
{
"epoch": 3.86,
"learning_rate": 2.3100580931768995e-06,
"loss": 0.3253,
"step": 13680
},
{
"epoch": 3.86,
"learning_rate": 2.3043626836769565e-06,
"loss": 0.2308,
"step": 13690
},
{
"epoch": 3.86,
"learning_rate": 2.2986672741770136e-06,
"loss": 0.2583,
"step": 13700
},
{
"epoch": 3.87,
"learning_rate": 2.2929718646770706e-06,
"loss": 0.4509,
"step": 13710
},
{
"epoch": 3.87,
"learning_rate": 2.2872764551771272e-06,
"loss": 0.3235,
"step": 13720
},
{
"epoch": 3.87,
"learning_rate": 2.2815810456771843e-06,
"loss": 0.3594,
"step": 13730
},
{
"epoch": 3.87,
"learning_rate": 2.2758856361772413e-06,
"loss": 0.5218,
"step": 13740
},
{
"epoch": 3.88,
"learning_rate": 2.2701902266772983e-06,
"loss": 0.2541,
"step": 13750
},
{
"epoch": 3.88,
"learning_rate": 2.2644948171773554e-06,
"loss": 0.3263,
"step": 13760
},
{
"epoch": 3.88,
"learning_rate": 2.2587994076774124e-06,
"loss": 0.4093,
"step": 13770
},
{
"epoch": 3.88,
"learning_rate": 2.253103998177469e-06,
"loss": 0.3335,
"step": 13780
},
{
"epoch": 3.89,
"learning_rate": 2.247408588677526e-06,
"loss": 0.3746,
"step": 13790
},
{
"epoch": 3.89,
"learning_rate": 2.241713179177583e-06,
"loss": 0.4138,
"step": 13800
},
{
"epoch": 3.89,
"learning_rate": 2.23601776967764e-06,
"loss": 0.4556,
"step": 13810
},
{
"epoch": 3.9,
"learning_rate": 2.2303223601776968e-06,
"loss": 0.3204,
"step": 13820
},
{
"epoch": 3.9,
"learning_rate": 2.224626950677754e-06,
"loss": 0.2649,
"step": 13830
},
{
"epoch": 3.9,
"learning_rate": 2.218931541177811e-06,
"loss": 0.2692,
"step": 13840
},
{
"epoch": 3.9,
"learning_rate": 2.213236131677868e-06,
"loss": 0.3215,
"step": 13850
},
{
"epoch": 3.91,
"learning_rate": 2.2075407221779245e-06,
"loss": 0.4207,
"step": 13860
},
{
"epoch": 3.91,
"learning_rate": 2.201845312677982e-06,
"loss": 0.4033,
"step": 13870
},
{
"epoch": 3.91,
"learning_rate": 2.1961499031780386e-06,
"loss": 0.2342,
"step": 13880
},
{
"epoch": 3.92,
"learning_rate": 2.1904544936780956e-06,
"loss": 0.2611,
"step": 13890
},
{
"epoch": 3.92,
"learning_rate": 2.1847590841781527e-06,
"loss": 0.3371,
"step": 13900
},
{
"epoch": 3.92,
"learning_rate": 2.1790636746782097e-06,
"loss": 0.4576,
"step": 13910
},
{
"epoch": 3.92,
"learning_rate": 2.1733682651782663e-06,
"loss": 0.4421,
"step": 13920
},
{
"epoch": 3.93,
"learning_rate": 2.1676728556783234e-06,
"loss": 0.5129,
"step": 13930
},
{
"epoch": 3.93,
"learning_rate": 2.1619774461783804e-06,
"loss": 0.2611,
"step": 13940
},
{
"epoch": 3.93,
"learning_rate": 2.1562820366784375e-06,
"loss": 0.4886,
"step": 13950
},
{
"epoch": 3.94,
"learning_rate": 2.150586627178494e-06,
"loss": 0.1713,
"step": 13960
},
{
"epoch": 3.94,
"learning_rate": 2.1448912176785515e-06,
"loss": 0.1568,
"step": 13970
},
{
"epoch": 3.94,
"learning_rate": 2.139195808178608e-06,
"loss": 0.4705,
"step": 13980
},
{
"epoch": 3.94,
"learning_rate": 2.133500398678665e-06,
"loss": 0.2815,
"step": 13990
},
{
"epoch": 3.95,
"learning_rate": 2.1278049891787222e-06,
"loss": 0.2475,
"step": 14000
},
{
"epoch": 3.95,
"eval_loss": 0.34873369336128235,
"eval_runtime": 212.3511,
"eval_samples_per_second": 9.517,
"eval_steps_per_second": 2.383,
"step": 14000
},
{
"epoch": 3.95,
"learning_rate": 2.1221095796787793e-06,
"loss": 0.4751,
"step": 14010
},
{
"epoch": 3.95,
"learning_rate": 2.116414170178836e-06,
"loss": 0.4,
"step": 14020
},
{
"epoch": 3.96,
"learning_rate": 2.110718760678893e-06,
"loss": 0.4484,
"step": 14030
},
{
"epoch": 3.96,
"learning_rate": 2.10502335117895e-06,
"loss": 0.2746,
"step": 14040
},
{
"epoch": 3.96,
"learning_rate": 2.099327941679007e-06,
"loss": 0.4018,
"step": 14050
},
{
"epoch": 3.96,
"learning_rate": 2.0936325321790636e-06,
"loss": 0.2564,
"step": 14060
},
{
"epoch": 3.97,
"learning_rate": 2.087937122679121e-06,
"loss": 0.5503,
"step": 14070
},
{
"epoch": 3.97,
"learning_rate": 2.0822417131791777e-06,
"loss": 0.147,
"step": 14080
},
{
"epoch": 3.97,
"learning_rate": 2.0765463036792347e-06,
"loss": 0.1796,
"step": 14090
},
{
"epoch": 3.98,
"learning_rate": 2.0708508941792914e-06,
"loss": 0.5249,
"step": 14100
},
{
"epoch": 3.98,
"learning_rate": 2.065155484679349e-06,
"loss": 0.4368,
"step": 14110
},
{
"epoch": 3.98,
"learning_rate": 2.0594600751794054e-06,
"loss": 0.325,
"step": 14120
},
{
"epoch": 3.98,
"learning_rate": 2.0537646656794625e-06,
"loss": 0.2107,
"step": 14130
},
{
"epoch": 3.99,
"learning_rate": 2.0480692561795195e-06,
"loss": 0.5274,
"step": 14140
},
{
"epoch": 3.99,
"learning_rate": 2.0423738466795766e-06,
"loss": 0.3492,
"step": 14150
},
{
"epoch": 3.99,
"learning_rate": 2.036678437179633e-06,
"loss": 0.201,
"step": 14160
},
{
"epoch": 3.99,
"learning_rate": 2.0309830276796906e-06,
"loss": 0.5432,
"step": 14170
},
{
"epoch": 4.0,
"learning_rate": 2.0252876181797473e-06,
"loss": 0.2668,
"step": 14180
},
{
"epoch": 4.0,
"learning_rate": 2.0195922086798043e-06,
"loss": 0.3592,
"step": 14190
},
{
"epoch": 4.0,
"learning_rate": 2.013896799179861e-06,
"loss": 0.2449,
"step": 14200
},
{
"epoch": 4.01,
"learning_rate": 2.0082013896799184e-06,
"loss": 0.2965,
"step": 14210
},
{
"epoch": 4.01,
"learning_rate": 2.002505980179975e-06,
"loss": 0.5565,
"step": 14220
},
{
"epoch": 4.01,
"learning_rate": 1.996810570680032e-06,
"loss": 0.3033,
"step": 14230
},
{
"epoch": 4.01,
"learning_rate": 1.991115161180089e-06,
"loss": 0.3251,
"step": 14240
},
{
"epoch": 4.02,
"learning_rate": 1.985419751680146e-06,
"loss": 0.3458,
"step": 14250
},
{
"epoch": 4.02,
"learning_rate": 1.9797243421802027e-06,
"loss": 0.1492,
"step": 14260
},
{
"epoch": 4.02,
"learning_rate": 1.9740289326802598e-06,
"loss": 0.2058,
"step": 14270
},
{
"epoch": 4.03,
"learning_rate": 1.968333523180317e-06,
"loss": 0.3785,
"step": 14280
},
{
"epoch": 4.03,
"learning_rate": 1.962638113680374e-06,
"loss": 0.3519,
"step": 14290
},
{
"epoch": 4.03,
"learning_rate": 1.9569427041804305e-06,
"loss": 0.4122,
"step": 14300
},
{
"epoch": 4.03,
"learning_rate": 1.951247294680488e-06,
"loss": 0.1842,
"step": 14310
},
{
"epoch": 4.04,
"learning_rate": 1.9455518851805446e-06,
"loss": 0.3497,
"step": 14320
},
{
"epoch": 4.04,
"learning_rate": 1.9398564756806016e-06,
"loss": 0.258,
"step": 14330
},
{
"epoch": 4.04,
"learning_rate": 1.9341610661806586e-06,
"loss": 0.2991,
"step": 14340
},
{
"epoch": 4.05,
"learning_rate": 1.9284656566807157e-06,
"loss": 0.3041,
"step": 14350
},
{
"epoch": 4.05,
"learning_rate": 1.9227702471807723e-06,
"loss": 0.3762,
"step": 14360
},
{
"epoch": 4.05,
"learning_rate": 1.9170748376808293e-06,
"loss": 0.2267,
"step": 14370
},
{
"epoch": 4.05,
"learning_rate": 1.9113794281808864e-06,
"loss": 0.3088,
"step": 14380
},
{
"epoch": 4.06,
"learning_rate": 1.9056840186809434e-06,
"loss": 0.163,
"step": 14390
},
{
"epoch": 4.06,
"learning_rate": 1.8999886091810002e-06,
"loss": 0.134,
"step": 14400
},
{
"epoch": 4.06,
"learning_rate": 1.8942931996810573e-06,
"loss": 0.3025,
"step": 14410
},
{
"epoch": 4.07,
"learning_rate": 1.8885977901811143e-06,
"loss": 0.3232,
"step": 14420
},
{
"epoch": 4.07,
"learning_rate": 1.8829023806811711e-06,
"loss": 0.4234,
"step": 14430
},
{
"epoch": 4.07,
"learning_rate": 1.877206971181228e-06,
"loss": 0.2133,
"step": 14440
},
{
"epoch": 4.07,
"learning_rate": 1.871511561681285e-06,
"loss": 0.5324,
"step": 14450
},
{
"epoch": 4.08,
"learning_rate": 1.865816152181342e-06,
"loss": 0.6641,
"step": 14460
},
{
"epoch": 4.08,
"learning_rate": 1.8601207426813989e-06,
"loss": 0.2977,
"step": 14470
},
{
"epoch": 4.08,
"learning_rate": 1.854425333181456e-06,
"loss": 0.3066,
"step": 14480
},
{
"epoch": 4.09,
"learning_rate": 1.848729923681513e-06,
"loss": 0.2809,
"step": 14490
},
{
"epoch": 4.09,
"learning_rate": 1.8430345141815698e-06,
"loss": 0.2115,
"step": 14500
},
{
"epoch": 4.09,
"eval_loss": 0.35345104336738586,
"eval_runtime": 211.5013,
"eval_samples_per_second": 9.555,
"eval_steps_per_second": 2.392,
"step": 14500
},
{
"epoch": 4.09,
"learning_rate": 1.8373391046816266e-06,
"loss": 0.4939,
"step": 14510
},
{
"epoch": 4.09,
"learning_rate": 1.8316436951816839e-06,
"loss": 0.2237,
"step": 14520
},
{
"epoch": 4.1,
"learning_rate": 1.8259482856817407e-06,
"loss": 0.0826,
"step": 14530
},
{
"epoch": 4.1,
"learning_rate": 1.8202528761817975e-06,
"loss": 0.2169,
"step": 14540
},
{
"epoch": 4.1,
"learning_rate": 1.8145574666818546e-06,
"loss": 0.6057,
"step": 14550
},
{
"epoch": 4.1,
"learning_rate": 1.8088620571819116e-06,
"loss": 0.2384,
"step": 14560
},
{
"epoch": 4.11,
"learning_rate": 1.8031666476819684e-06,
"loss": 0.2647,
"step": 14570
},
{
"epoch": 4.11,
"learning_rate": 1.7974712381820255e-06,
"loss": 0.2658,
"step": 14580
},
{
"epoch": 4.11,
"learning_rate": 1.7917758286820825e-06,
"loss": 0.2966,
"step": 14590
},
{
"epoch": 4.12,
"learning_rate": 1.7860804191821393e-06,
"loss": 0.3193,
"step": 14600
},
{
"epoch": 4.12,
"learning_rate": 1.7803850096821962e-06,
"loss": 0.3529,
"step": 14610
},
{
"epoch": 4.12,
"learning_rate": 1.7746896001822534e-06,
"loss": 0.3283,
"step": 14620
},
{
"epoch": 4.12,
"learning_rate": 1.7689941906823103e-06,
"loss": 0.2698,
"step": 14630
},
{
"epoch": 4.13,
"learning_rate": 1.763298781182367e-06,
"loss": 0.3715,
"step": 14640
},
{
"epoch": 4.13,
"learning_rate": 1.7576033716824241e-06,
"loss": 0.3999,
"step": 14650
},
{
"epoch": 4.13,
"learning_rate": 1.7519079621824812e-06,
"loss": 0.3378,
"step": 14660
},
{
"epoch": 4.14,
"learning_rate": 1.746212552682538e-06,
"loss": 0.215,
"step": 14670
},
{
"epoch": 4.14,
"learning_rate": 1.7405171431825948e-06,
"loss": 0.3081,
"step": 14680
},
{
"epoch": 4.14,
"learning_rate": 1.734821733682652e-06,
"loss": 0.2634,
"step": 14690
},
{
"epoch": 4.14,
"learning_rate": 1.729126324182709e-06,
"loss": 0.4724,
"step": 14700
},
{
"epoch": 4.15,
"learning_rate": 1.7234309146827657e-06,
"loss": 0.294,
"step": 14710
},
{
"epoch": 4.15,
"learning_rate": 1.717735505182823e-06,
"loss": 0.2399,
"step": 14720
},
{
"epoch": 4.15,
"learning_rate": 1.7120400956828798e-06,
"loss": 0.2102,
"step": 14730
},
{
"epoch": 4.16,
"learning_rate": 1.7063446861829366e-06,
"loss": 0.1503,
"step": 14740
},
{
"epoch": 4.16,
"learning_rate": 1.7006492766829935e-06,
"loss": 0.1694,
"step": 14750
},
{
"epoch": 4.16,
"learning_rate": 1.6949538671830507e-06,
"loss": 0.4448,
"step": 14760
},
{
"epoch": 4.16,
"learning_rate": 1.6892584576831075e-06,
"loss": 0.1987,
"step": 14770
},
{
"epoch": 4.17,
"learning_rate": 1.6835630481831644e-06,
"loss": 0.3097,
"step": 14780
},
{
"epoch": 4.17,
"learning_rate": 1.6778676386832216e-06,
"loss": 0.2567,
"step": 14790
},
{
"epoch": 4.17,
"learning_rate": 1.6721722291832785e-06,
"loss": 0.2016,
"step": 14800
},
{
"epoch": 4.18,
"learning_rate": 1.6664768196833353e-06,
"loss": 0.173,
"step": 14810
},
{
"epoch": 4.18,
"learning_rate": 1.6607814101833925e-06,
"loss": 0.2601,
"step": 14820
},
{
"epoch": 4.18,
"learning_rate": 1.6550860006834494e-06,
"loss": 0.2977,
"step": 14830
},
{
"epoch": 4.18,
"learning_rate": 1.6493905911835062e-06,
"loss": 0.2837,
"step": 14840
},
{
"epoch": 4.19,
"learning_rate": 1.643695181683563e-06,
"loss": 0.3155,
"step": 14850
},
{
"epoch": 4.19,
"learning_rate": 1.6379997721836203e-06,
"loss": 0.2774,
"step": 14860
},
{
"epoch": 4.19,
"learning_rate": 1.632304362683677e-06,
"loss": 0.0978,
"step": 14870
},
{
"epoch": 4.2,
"learning_rate": 1.626608953183734e-06,
"loss": 0.4131,
"step": 14880
},
{
"epoch": 4.2,
"learning_rate": 1.6209135436837912e-06,
"loss": 0.3686,
"step": 14890
},
{
"epoch": 4.2,
"learning_rate": 1.615218134183848e-06,
"loss": 0.0879,
"step": 14900
},
{
"epoch": 4.2,
"learning_rate": 1.6095227246839048e-06,
"loss": 0.401,
"step": 14910
},
{
"epoch": 4.21,
"learning_rate": 1.6038273151839617e-06,
"loss": 0.2656,
"step": 14920
},
{
"epoch": 4.21,
"learning_rate": 1.598131905684019e-06,
"loss": 0.3608,
"step": 14930
},
{
"epoch": 4.21,
"learning_rate": 1.5924364961840757e-06,
"loss": 0.3265,
"step": 14940
},
{
"epoch": 4.21,
"learning_rate": 1.5867410866841326e-06,
"loss": 0.372,
"step": 14950
},
{
"epoch": 4.22,
"learning_rate": 1.5810456771841898e-06,
"loss": 0.302,
"step": 14960
},
{
"epoch": 4.22,
"learning_rate": 1.5753502676842467e-06,
"loss": 0.2329,
"step": 14970
},
{
"epoch": 4.22,
"learning_rate": 1.5696548581843035e-06,
"loss": 0.1317,
"step": 14980
},
{
"epoch": 4.23,
"learning_rate": 1.5639594486843607e-06,
"loss": 0.5018,
"step": 14990
},
{
"epoch": 4.23,
"learning_rate": 1.5582640391844176e-06,
"loss": 0.4088,
"step": 15000
},
{
"epoch": 4.23,
"eval_loss": 0.3560781478881836,
"eval_runtime": 211.6609,
"eval_samples_per_second": 9.548,
"eval_steps_per_second": 2.391,
"step": 15000
},
{
"epoch": 4.23,
"learning_rate": 1.5525686296844744e-06,
"loss": 0.3594,
"step": 15010
},
{
"epoch": 4.23,
"learning_rate": 1.5468732201845312e-06,
"loss": 0.1288,
"step": 15020
},
{
"epoch": 4.24,
"learning_rate": 1.5411778106845885e-06,
"loss": 0.1063,
"step": 15030
},
{
"epoch": 4.24,
"learning_rate": 1.5354824011846453e-06,
"loss": 0.3444,
"step": 15040
},
{
"epoch": 4.24,
"learning_rate": 1.5297869916847021e-06,
"loss": 0.393,
"step": 15050
},
{
"epoch": 4.25,
"learning_rate": 1.5240915821847594e-06,
"loss": 0.3674,
"step": 15060
},
{
"epoch": 4.25,
"learning_rate": 1.5183961726848162e-06,
"loss": 0.2209,
"step": 15070
},
{
"epoch": 4.25,
"learning_rate": 1.512700763184873e-06,
"loss": 0.2758,
"step": 15080
},
{
"epoch": 4.25,
"learning_rate": 1.5070053536849299e-06,
"loss": 0.3258,
"step": 15090
},
{
"epoch": 4.26,
"learning_rate": 1.5013099441849871e-06,
"loss": 0.5878,
"step": 15100
},
{
"epoch": 4.26,
"learning_rate": 1.495614534685044e-06,
"loss": 0.2565,
"step": 15110
},
{
"epoch": 4.26,
"learning_rate": 1.4899191251851008e-06,
"loss": 0.1671,
"step": 15120
},
{
"epoch": 4.27,
"learning_rate": 1.484223715685158e-06,
"loss": 0.3276,
"step": 15130
},
{
"epoch": 4.27,
"learning_rate": 1.4785283061852149e-06,
"loss": 0.5138,
"step": 15140
},
{
"epoch": 4.27,
"learning_rate": 1.4728328966852717e-06,
"loss": 0.4189,
"step": 15150
},
{
"epoch": 4.27,
"learning_rate": 1.4671374871853285e-06,
"loss": 0.1441,
"step": 15160
},
{
"epoch": 4.28,
"learning_rate": 1.4614420776853858e-06,
"loss": 0.4461,
"step": 15170
},
{
"epoch": 4.28,
"learning_rate": 1.4557466681854426e-06,
"loss": 0.2924,
"step": 15180
},
{
"epoch": 4.28,
"learning_rate": 1.4500512586854994e-06,
"loss": 0.2742,
"step": 15190
},
{
"epoch": 4.29,
"learning_rate": 1.4443558491855567e-06,
"loss": 0.5659,
"step": 15200
},
{
"epoch": 4.29,
"learning_rate": 1.4386604396856135e-06,
"loss": 0.3907,
"step": 15210
},
{
"epoch": 4.29,
"learning_rate": 1.4329650301856703e-06,
"loss": 0.3276,
"step": 15220
},
{
"epoch": 4.29,
"learning_rate": 1.4272696206857276e-06,
"loss": 0.1779,
"step": 15230
},
{
"epoch": 4.3,
"learning_rate": 1.4215742111857844e-06,
"loss": 0.2407,
"step": 15240
},
{
"epoch": 4.3,
"learning_rate": 1.4158788016858412e-06,
"loss": 0.5414,
"step": 15250
},
{
"epoch": 4.3,
"learning_rate": 1.410183392185898e-06,
"loss": 0.213,
"step": 15260
},
{
"epoch": 4.31,
"learning_rate": 1.4044879826859553e-06,
"loss": 0.3669,
"step": 15270
},
{
"epoch": 4.31,
"learning_rate": 1.3987925731860122e-06,
"loss": 0.4115,
"step": 15280
},
{
"epoch": 4.31,
"learning_rate": 1.393097163686069e-06,
"loss": 0.352,
"step": 15290
},
{
"epoch": 4.31,
"learning_rate": 1.3874017541861262e-06,
"loss": 0.5727,
"step": 15300
},
{
"epoch": 4.32,
"learning_rate": 1.381706344686183e-06,
"loss": 0.3396,
"step": 15310
},
{
"epoch": 4.32,
"learning_rate": 1.3760109351862399e-06,
"loss": 0.2974,
"step": 15320
},
{
"epoch": 4.32,
"learning_rate": 1.370315525686297e-06,
"loss": 0.3642,
"step": 15330
},
{
"epoch": 4.32,
"learning_rate": 1.364620116186354e-06,
"loss": 0.2653,
"step": 15340
},
{
"epoch": 4.33,
"learning_rate": 1.3589247066864108e-06,
"loss": 0.3407,
"step": 15350
},
{
"epoch": 4.33,
"learning_rate": 1.3532292971864676e-06,
"loss": 0.3,
"step": 15360
},
{
"epoch": 4.33,
"learning_rate": 1.3475338876865249e-06,
"loss": 0.2095,
"step": 15370
},
{
"epoch": 4.34,
"learning_rate": 1.3418384781865817e-06,
"loss": 0.2906,
"step": 15380
},
{
"epoch": 4.34,
"learning_rate": 1.3361430686866385e-06,
"loss": 0.263,
"step": 15390
},
{
"epoch": 4.34,
"learning_rate": 1.3304476591866956e-06,
"loss": 0.2987,
"step": 15400
},
{
"epoch": 4.34,
"learning_rate": 1.3247522496867526e-06,
"loss": 0.3188,
"step": 15410
},
{
"epoch": 4.35,
"learning_rate": 1.3190568401868094e-06,
"loss": 0.6636,
"step": 15420
},
{
"epoch": 4.35,
"learning_rate": 1.3133614306868665e-06,
"loss": 0.4354,
"step": 15430
},
{
"epoch": 4.35,
"learning_rate": 1.3076660211869235e-06,
"loss": 0.2682,
"step": 15440
},
{
"epoch": 4.36,
"learning_rate": 1.3019706116869804e-06,
"loss": 0.4007,
"step": 15450
},
{
"epoch": 4.36,
"learning_rate": 1.2962752021870372e-06,
"loss": 0.4228,
"step": 15460
},
{
"epoch": 4.36,
"learning_rate": 1.2905797926870944e-06,
"loss": 0.2764,
"step": 15470
},
{
"epoch": 4.36,
"learning_rate": 1.2848843831871513e-06,
"loss": 0.1821,
"step": 15480
},
{
"epoch": 4.37,
"learning_rate": 1.279188973687208e-06,
"loss": 0.1724,
"step": 15490
},
{
"epoch": 4.37,
"learning_rate": 1.2734935641872651e-06,
"loss": 0.502,
"step": 15500
},
{
"epoch": 4.37,
"eval_loss": 0.3557915985584259,
"eval_runtime": 212.001,
"eval_samples_per_second": 9.533,
"eval_steps_per_second": 2.387,
"step": 15500
},
{
"epoch": 4.37,
"learning_rate": 1.2677981546873222e-06,
"loss": 0.4079,
"step": 15510
},
{
"epoch": 4.38,
"learning_rate": 1.262102745187379e-06,
"loss": 0.3792,
"step": 15520
},
{
"epoch": 4.38,
"learning_rate": 1.256407335687436e-06,
"loss": 0.2857,
"step": 15530
},
{
"epoch": 4.38,
"learning_rate": 1.250711926187493e-06,
"loss": 0.2627,
"step": 15540
},
{
"epoch": 4.38,
"learning_rate": 1.24501651668755e-06,
"loss": 0.3787,
"step": 15550
},
{
"epoch": 4.39,
"learning_rate": 1.239321107187607e-06,
"loss": 0.4017,
"step": 15560
},
{
"epoch": 4.39,
"learning_rate": 1.2336256976876638e-06,
"loss": 0.3776,
"step": 15570
},
{
"epoch": 4.39,
"learning_rate": 1.2279302881877208e-06,
"loss": 0.3595,
"step": 15580
},
{
"epoch": 4.4,
"learning_rate": 1.2222348786877776e-06,
"loss": 0.2288,
"step": 15590
},
{
"epoch": 4.4,
"learning_rate": 1.2165394691878347e-06,
"loss": 0.3921,
"step": 15600
},
{
"epoch": 4.4,
"learning_rate": 1.2108440596878917e-06,
"loss": 0.3526,
"step": 15610
},
{
"epoch": 4.4,
"learning_rate": 1.2051486501879486e-06,
"loss": 0.2754,
"step": 15620
},
{
"epoch": 4.41,
"learning_rate": 1.1994532406880056e-06,
"loss": 0.347,
"step": 15630
},
{
"epoch": 4.41,
"learning_rate": 1.1937578311880624e-06,
"loss": 0.2734,
"step": 15640
},
{
"epoch": 4.41,
"learning_rate": 1.1880624216881195e-06,
"loss": 0.3327,
"step": 15650
},
{
"epoch": 4.41,
"learning_rate": 1.1823670121881765e-06,
"loss": 0.3287,
"step": 15660
},
{
"epoch": 4.42,
"learning_rate": 1.1766716026882333e-06,
"loss": 0.3132,
"step": 15670
},
{
"epoch": 4.42,
"learning_rate": 1.1709761931882904e-06,
"loss": 0.4023,
"step": 15680
},
{
"epoch": 4.42,
"learning_rate": 1.1652807836883472e-06,
"loss": 0.3814,
"step": 15690
},
{
"epoch": 4.43,
"learning_rate": 1.1595853741884042e-06,
"loss": 0.4485,
"step": 15700
},
{
"epoch": 4.43,
"learning_rate": 1.1538899646884613e-06,
"loss": 0.286,
"step": 15710
},
{
"epoch": 4.43,
"learning_rate": 1.1481945551885181e-06,
"loss": 0.6182,
"step": 15720
},
{
"epoch": 4.43,
"learning_rate": 1.1424991456885751e-06,
"loss": 0.4019,
"step": 15730
},
{
"epoch": 4.44,
"learning_rate": 1.136803736188632e-06,
"loss": 0.3184,
"step": 15740
},
{
"epoch": 4.44,
"learning_rate": 1.131108326688689e-06,
"loss": 0.4947,
"step": 15750
},
{
"epoch": 4.44,
"learning_rate": 1.125412917188746e-06,
"loss": 0.4203,
"step": 15760
},
{
"epoch": 4.45,
"learning_rate": 1.1197175076888029e-06,
"loss": 0.1995,
"step": 15770
},
{
"epoch": 4.45,
"learning_rate": 1.11402209818886e-06,
"loss": 0.2659,
"step": 15780
},
{
"epoch": 4.45,
"learning_rate": 1.1083266886889168e-06,
"loss": 0.3046,
"step": 15790
},
{
"epoch": 4.45,
"learning_rate": 1.1026312791889738e-06,
"loss": 0.1809,
"step": 15800
},
{
"epoch": 4.46,
"learning_rate": 1.0969358696890308e-06,
"loss": 0.2639,
"step": 15810
},
{
"epoch": 4.46,
"learning_rate": 1.0912404601890877e-06,
"loss": 0.272,
"step": 15820
},
{
"epoch": 4.46,
"learning_rate": 1.0855450506891447e-06,
"loss": 0.6332,
"step": 15830
},
{
"epoch": 4.47,
"learning_rate": 1.0798496411892015e-06,
"loss": 0.3555,
"step": 15840
},
{
"epoch": 4.47,
"learning_rate": 1.0741542316892586e-06,
"loss": 0.271,
"step": 15850
},
{
"epoch": 4.47,
"learning_rate": 1.0684588221893156e-06,
"loss": 0.1772,
"step": 15860
},
{
"epoch": 4.47,
"learning_rate": 1.0627634126893724e-06,
"loss": 0.4972,
"step": 15870
},
{
"epoch": 4.48,
"learning_rate": 1.0570680031894295e-06,
"loss": 0.3808,
"step": 15880
},
{
"epoch": 4.48,
"learning_rate": 1.0513725936894863e-06,
"loss": 0.2823,
"step": 15890
},
{
"epoch": 4.48,
"learning_rate": 1.0456771841895433e-06,
"loss": 0.3016,
"step": 15900
},
{
"epoch": 4.49,
"learning_rate": 1.0399817746896004e-06,
"loss": 0.4662,
"step": 15910
},
{
"epoch": 4.49,
"learning_rate": 1.0342863651896572e-06,
"loss": 0.3594,
"step": 15920
},
{
"epoch": 4.49,
"learning_rate": 1.0285909556897143e-06,
"loss": 0.2838,
"step": 15930
},
{
"epoch": 4.49,
"learning_rate": 1.022895546189771e-06,
"loss": 0.3706,
"step": 15940
},
{
"epoch": 4.5,
"learning_rate": 1.0172001366898281e-06,
"loss": 0.4332,
"step": 15950
},
{
"epoch": 4.5,
"learning_rate": 1.0115047271898852e-06,
"loss": 0.2483,
"step": 15960
},
{
"epoch": 4.5,
"learning_rate": 1.005809317689942e-06,
"loss": 0.2208,
"step": 15970
},
{
"epoch": 4.51,
"learning_rate": 1.000113908189999e-06,
"loss": 0.3334,
"step": 15980
},
{
"epoch": 4.51,
"learning_rate": 9.944184986900559e-07,
"loss": 0.298,
"step": 15990
},
{
"epoch": 4.51,
"learning_rate": 9.88723089190113e-07,
"loss": 0.3099,
"step": 16000
},
{
"epoch": 4.51,
"eval_loss": 0.3557519316673279,
"eval_runtime": 212.3782,
"eval_samples_per_second": 9.516,
"eval_steps_per_second": 2.383,
"step": 16000
},
{
"epoch": 4.51,
"learning_rate": 9.8302767969017e-07,
"loss": 0.4163,
"step": 16010
},
{
"epoch": 4.52,
"learning_rate": 9.773322701902268e-07,
"loss": 0.1947,
"step": 16020
},
{
"epoch": 4.52,
"learning_rate": 9.716368606902838e-07,
"loss": 0.3316,
"step": 16030
},
{
"epoch": 4.52,
"learning_rate": 9.659414511903406e-07,
"loss": 0.1959,
"step": 16040
},
{
"epoch": 4.52,
"learning_rate": 9.602460416903977e-07,
"loss": 0.2092,
"step": 16050
},
{
"epoch": 4.53,
"learning_rate": 9.545506321904547e-07,
"loss": 0.1516,
"step": 16060
},
{
"epoch": 4.53,
"learning_rate": 9.488552226905115e-07,
"loss": 0.5363,
"step": 16070
},
{
"epoch": 4.53,
"learning_rate": 9.431598131905685e-07,
"loss": 0.3025,
"step": 16080
},
{
"epoch": 4.54,
"learning_rate": 9.374644036906254e-07,
"loss": 0.4779,
"step": 16090
},
{
"epoch": 4.54,
"learning_rate": 9.317689941906824e-07,
"loss": 0.277,
"step": 16100
},
{
"epoch": 4.54,
"learning_rate": 9.260735846907394e-07,
"loss": 0.1441,
"step": 16110
},
{
"epoch": 4.54,
"learning_rate": 9.203781751907963e-07,
"loss": 0.2157,
"step": 16120
},
{
"epoch": 4.55,
"learning_rate": 9.146827656908533e-07,
"loss": 0.2452,
"step": 16130
},
{
"epoch": 4.55,
"learning_rate": 9.089873561909102e-07,
"loss": 0.4405,
"step": 16140
},
{
"epoch": 4.55,
"learning_rate": 9.032919466909671e-07,
"loss": 0.4546,
"step": 16150
},
{
"epoch": 4.56,
"learning_rate": 8.975965371910241e-07,
"loss": 0.4222,
"step": 16160
},
{
"epoch": 4.56,
"learning_rate": 8.919011276910811e-07,
"loss": 0.4103,
"step": 16170
},
{
"epoch": 4.56,
"learning_rate": 8.86205718191138e-07,
"loss": 0.522,
"step": 16180
},
{
"epoch": 4.56,
"learning_rate": 8.80510308691195e-07,
"loss": 0.4269,
"step": 16190
},
{
"epoch": 4.57,
"learning_rate": 8.748148991912519e-07,
"loss": 0.3538,
"step": 16200
},
{
"epoch": 4.57,
"learning_rate": 8.691194896913088e-07,
"loss": 0.3338,
"step": 16210
},
{
"epoch": 4.57,
"learning_rate": 8.634240801913659e-07,
"loss": 0.2906,
"step": 16220
},
{
"epoch": 4.58,
"learning_rate": 8.577286706914228e-07,
"loss": 0.3005,
"step": 16230
},
{
"epoch": 4.58,
"learning_rate": 8.520332611914797e-07,
"loss": 0.5027,
"step": 16240
},
{
"epoch": 4.58,
"learning_rate": 8.463378516915367e-07,
"loss": 0.255,
"step": 16250
},
{
"epoch": 4.58,
"learning_rate": 8.406424421915936e-07,
"loss": 0.1749,
"step": 16260
},
{
"epoch": 4.59,
"learning_rate": 8.349470326916507e-07,
"loss": 0.2787,
"step": 16270
},
{
"epoch": 4.59,
"learning_rate": 8.292516231917075e-07,
"loss": 0.2366,
"step": 16280
},
{
"epoch": 4.59,
"learning_rate": 8.235562136917645e-07,
"loss": 0.2687,
"step": 16290
},
{
"epoch": 4.6,
"learning_rate": 8.178608041918215e-07,
"loss": 0.4187,
"step": 16300
},
{
"epoch": 4.6,
"learning_rate": 8.121653946918784e-07,
"loss": 0.1763,
"step": 16310
},
{
"epoch": 4.6,
"learning_rate": 8.064699851919354e-07,
"loss": 0.377,
"step": 16320
},
{
"epoch": 4.6,
"learning_rate": 8.007745756919923e-07,
"loss": 0.5291,
"step": 16330
},
{
"epoch": 4.61,
"learning_rate": 7.950791661920493e-07,
"loss": 0.3513,
"step": 16340
},
{
"epoch": 4.61,
"learning_rate": 7.893837566921062e-07,
"loss": 0.2492,
"step": 16350
},
{
"epoch": 4.61,
"learning_rate": 7.836883471921632e-07,
"loss": 0.4625,
"step": 16360
},
{
"epoch": 4.62,
"learning_rate": 7.779929376922202e-07,
"loss": 0.2957,
"step": 16370
},
{
"epoch": 4.62,
"learning_rate": 7.72297528192277e-07,
"loss": 0.3181,
"step": 16380
},
{
"epoch": 4.62,
"learning_rate": 7.666021186923341e-07,
"loss": 0.1754,
"step": 16390
},
{
"epoch": 4.62,
"learning_rate": 7.609067091923909e-07,
"loss": 0.4674,
"step": 16400
},
{
"epoch": 4.63,
"learning_rate": 7.55211299692448e-07,
"loss": 0.3187,
"step": 16410
},
{
"epoch": 4.63,
"learning_rate": 7.49515890192505e-07,
"loss": 0.5942,
"step": 16420
},
{
"epoch": 4.63,
"learning_rate": 7.438204806925618e-07,
"loss": 0.3195,
"step": 16430
},
{
"epoch": 4.63,
"learning_rate": 7.381250711926189e-07,
"loss": 0.5229,
"step": 16440
},
{
"epoch": 4.64,
"learning_rate": 7.324296616926757e-07,
"loss": 0.4654,
"step": 16450
},
{
"epoch": 4.64,
"learning_rate": 7.267342521927327e-07,
"loss": 0.4792,
"step": 16460
},
{
"epoch": 4.64,
"learning_rate": 7.210388426927898e-07,
"loss": 0.2923,
"step": 16470
},
{
"epoch": 4.65,
"learning_rate": 7.153434331928466e-07,
"loss": 0.2751,
"step": 16480
},
{
"epoch": 4.65,
"learning_rate": 7.096480236929036e-07,
"loss": 0.1897,
"step": 16490
},
{
"epoch": 4.65,
"learning_rate": 7.039526141929605e-07,
"loss": 0.2381,
"step": 16500
},
{
"epoch": 4.65,
"eval_loss": 0.3569630980491638,
"eval_runtime": 212.316,
"eval_samples_per_second": 9.519,
"eval_steps_per_second": 2.383,
"step": 16500
},
{
"epoch": 4.65,
"learning_rate": 6.982572046930175e-07,
"loss": 0.4211,
"step": 16510
},
{
"epoch": 4.66,
"learning_rate": 6.925617951930743e-07,
"loss": 0.2632,
"step": 16520
},
{
"epoch": 4.66,
"learning_rate": 6.868663856931314e-07,
"loss": 0.3168,
"step": 16530
},
{
"epoch": 4.66,
"learning_rate": 6.811709761931884e-07,
"loss": 0.1798,
"step": 16540
},
{
"epoch": 4.67,
"learning_rate": 6.754755666932452e-07,
"loss": 0.2793,
"step": 16550
},
{
"epoch": 4.67,
"learning_rate": 6.697801571933023e-07,
"loss": 0.2896,
"step": 16560
},
{
"epoch": 4.67,
"learning_rate": 6.640847476933591e-07,
"loss": 0.4018,
"step": 16570
},
{
"epoch": 4.67,
"learning_rate": 6.583893381934162e-07,
"loss": 0.4488,
"step": 16580
},
{
"epoch": 4.68,
"learning_rate": 6.526939286934732e-07,
"loss": 0.1678,
"step": 16590
},
{
"epoch": 4.68,
"learning_rate": 6.4699851919353e-07,
"loss": 0.2788,
"step": 16600
},
{
"epoch": 4.68,
"learning_rate": 6.413031096935871e-07,
"loss": 0.2512,
"step": 16610
},
{
"epoch": 4.69,
"learning_rate": 6.356077001936439e-07,
"loss": 0.297,
"step": 16620
},
{
"epoch": 4.69,
"learning_rate": 6.299122906937009e-07,
"loss": 0.1882,
"step": 16630
},
{
"epoch": 4.69,
"learning_rate": 6.242168811937579e-07,
"loss": 0.4612,
"step": 16640
},
{
"epoch": 4.69,
"learning_rate": 6.185214716938148e-07,
"loss": 0.3111,
"step": 16650
},
{
"epoch": 4.7,
"learning_rate": 6.128260621938717e-07,
"loss": 0.3121,
"step": 16660
},
{
"epoch": 4.7,
"learning_rate": 6.071306526939288e-07,
"loss": 0.2923,
"step": 16670
},
{
"epoch": 4.7,
"learning_rate": 6.014352431939857e-07,
"loss": 0.3701,
"step": 16680
},
{
"epoch": 4.71,
"learning_rate": 5.957398336940426e-07,
"loss": 0.2431,
"step": 16690
},
{
"epoch": 4.71,
"learning_rate": 5.900444241940996e-07,
"loss": 0.3444,
"step": 16700
},
{
"epoch": 4.71,
"learning_rate": 5.843490146941565e-07,
"loss": 0.2769,
"step": 16710
},
{
"epoch": 4.71,
"learning_rate": 5.786536051942134e-07,
"loss": 0.1501,
"step": 16720
},
{
"epoch": 4.72,
"learning_rate": 5.729581956942705e-07,
"loss": 0.3458,
"step": 16730
},
{
"epoch": 4.72,
"learning_rate": 5.672627861943274e-07,
"loss": 0.3527,
"step": 16740
},
{
"epoch": 4.72,
"learning_rate": 5.615673766943844e-07,
"loss": 0.3205,
"step": 16750
},
{
"epoch": 4.73,
"learning_rate": 5.558719671944413e-07,
"loss": 0.2322,
"step": 16760
},
{
"epoch": 4.73,
"learning_rate": 5.501765576944982e-07,
"loss": 0.583,
"step": 16770
},
{
"epoch": 4.73,
"learning_rate": 5.444811481945552e-07,
"loss": 0.3078,
"step": 16780
},
{
"epoch": 4.73,
"learning_rate": 5.387857386946122e-07,
"loss": 0.2509,
"step": 16790
},
{
"epoch": 4.74,
"learning_rate": 5.330903291946691e-07,
"loss": 0.2672,
"step": 16800
},
{
"epoch": 4.74,
"learning_rate": 5.273949196947261e-07,
"loss": 0.4086,
"step": 16810
},
{
"epoch": 4.74,
"learning_rate": 5.21699510194783e-07,
"loss": 0.4899,
"step": 16820
},
{
"epoch": 4.74,
"learning_rate": 5.1600410069484e-07,
"loss": 0.2831,
"step": 16830
},
{
"epoch": 4.75,
"learning_rate": 5.10308691194897e-07,
"loss": 0.3935,
"step": 16840
},
{
"epoch": 4.75,
"learning_rate": 5.046132816949539e-07,
"loss": 0.3414,
"step": 16850
},
{
"epoch": 4.75,
"learning_rate": 4.989178721950108e-07,
"loss": 0.2679,
"step": 16860
},
{
"epoch": 4.76,
"learning_rate": 4.932224626950678e-07,
"loss": 0.313,
"step": 16870
},
{
"epoch": 4.76,
"learning_rate": 4.875270531951248e-07,
"loss": 0.3272,
"step": 16880
},
{
"epoch": 4.76,
"learning_rate": 4.818316436951817e-07,
"loss": 0.321,
"step": 16890
},
{
"epoch": 4.76,
"learning_rate": 4.7613623419523863e-07,
"loss": 0.4763,
"step": 16900
},
{
"epoch": 4.77,
"learning_rate": 4.7044082469529567e-07,
"loss": 0.2577,
"step": 16910
},
{
"epoch": 4.77,
"learning_rate": 4.647454151953526e-07,
"loss": 0.2298,
"step": 16920
},
{
"epoch": 4.77,
"learning_rate": 4.5905000569540954e-07,
"loss": 0.3451,
"step": 16930
},
{
"epoch": 4.78,
"learning_rate": 4.533545961954665e-07,
"loss": 0.2389,
"step": 16940
},
{
"epoch": 4.78,
"learning_rate": 4.476591866955234e-07,
"loss": 0.3936,
"step": 16950
},
{
"epoch": 4.78,
"learning_rate": 4.4196377719558034e-07,
"loss": 0.3509,
"step": 16960
},
{
"epoch": 4.78,
"learning_rate": 4.362683676956374e-07,
"loss": 0.2643,
"step": 16970
},
{
"epoch": 4.79,
"learning_rate": 4.305729581956943e-07,
"loss": 0.541,
"step": 16980
},
{
"epoch": 4.79,
"learning_rate": 4.2487754869575125e-07,
"loss": 0.1942,
"step": 16990
},
{
"epoch": 4.79,
"learning_rate": 4.191821391958082e-07,
"loss": 0.3614,
"step": 17000
},
{
"epoch": 4.79,
"eval_loss": 0.3553633391857147,
"eval_runtime": 211.9344,
"eval_samples_per_second": 9.536,
"eval_steps_per_second": 2.388,
"step": 17000
},
{
"epoch": 4.8,
"learning_rate": 4.1348672969586517e-07,
"loss": 0.3707,
"step": 17010
},
{
"epoch": 4.8,
"learning_rate": 4.077913201959221e-07,
"loss": 0.3282,
"step": 17020
},
{
"epoch": 4.8,
"learning_rate": 4.020959106959791e-07,
"loss": 0.51,
"step": 17030
},
{
"epoch": 4.8,
"learning_rate": 3.9640050119603603e-07,
"loss": 0.1609,
"step": 17040
},
{
"epoch": 4.81,
"learning_rate": 3.9070509169609296e-07,
"loss": 0.3832,
"step": 17050
},
{
"epoch": 4.81,
"learning_rate": 3.8500968219614995e-07,
"loss": 0.2903,
"step": 17060
},
{
"epoch": 4.81,
"learning_rate": 3.793142726962069e-07,
"loss": 0.2686,
"step": 17070
},
{
"epoch": 4.82,
"learning_rate": 3.736188631962638e-07,
"loss": 0.2917,
"step": 17080
},
{
"epoch": 4.82,
"learning_rate": 3.679234536963208e-07,
"loss": 0.265,
"step": 17090
},
{
"epoch": 4.82,
"learning_rate": 3.6222804419637774e-07,
"loss": 0.3747,
"step": 17100
},
{
"epoch": 4.82,
"learning_rate": 3.5653263469643473e-07,
"loss": 0.5041,
"step": 17110
},
{
"epoch": 4.83,
"learning_rate": 3.5083722519649166e-07,
"loss": 0.2131,
"step": 17120
},
{
"epoch": 4.83,
"learning_rate": 3.451418156965486e-07,
"loss": 0.346,
"step": 17130
},
{
"epoch": 4.83,
"learning_rate": 3.3944640619660553e-07,
"loss": 0.1843,
"step": 17140
},
{
"epoch": 4.84,
"learning_rate": 3.337509966966625e-07,
"loss": 0.4283,
"step": 17150
},
{
"epoch": 4.84,
"learning_rate": 3.280555871967195e-07,
"loss": 0.4339,
"step": 17160
},
{
"epoch": 4.84,
"learning_rate": 3.2236017769677644e-07,
"loss": 0.2597,
"step": 17170
},
{
"epoch": 4.84,
"learning_rate": 3.1666476819683337e-07,
"loss": 0.3388,
"step": 17180
},
{
"epoch": 4.85,
"learning_rate": 3.1096935869689036e-07,
"loss": 0.4598,
"step": 17190
},
{
"epoch": 4.85,
"learning_rate": 3.052739491969473e-07,
"loss": 0.3224,
"step": 17200
},
{
"epoch": 4.85,
"learning_rate": 2.9957853969700423e-07,
"loss": 0.3763,
"step": 17210
},
{
"epoch": 4.85,
"learning_rate": 2.938831301970612e-07,
"loss": 0.3208,
"step": 17220
},
{
"epoch": 4.86,
"learning_rate": 2.8818772069711815e-07,
"loss": 0.1648,
"step": 17230
},
{
"epoch": 4.86,
"learning_rate": 2.824923111971751e-07,
"loss": 0.2525,
"step": 17240
},
{
"epoch": 4.86,
"learning_rate": 2.7679690169723207e-07,
"loss": 0.5624,
"step": 17250
},
{
"epoch": 4.87,
"learning_rate": 2.71101492197289e-07,
"loss": 0.4129,
"step": 17260
},
{
"epoch": 4.87,
"learning_rate": 2.6540608269734594e-07,
"loss": 0.2936,
"step": 17270
},
{
"epoch": 4.87,
"learning_rate": 2.5971067319740293e-07,
"loss": 0.1014,
"step": 17280
},
{
"epoch": 4.87,
"learning_rate": 2.5401526369745986e-07,
"loss": 0.5215,
"step": 17290
},
{
"epoch": 4.88,
"learning_rate": 2.483198541975168e-07,
"loss": 0.3502,
"step": 17300
},
{
"epoch": 4.88,
"learning_rate": 2.426244446975738e-07,
"loss": 0.4451,
"step": 17310
},
{
"epoch": 4.88,
"learning_rate": 2.3692903519763072e-07,
"loss": 0.3947,
"step": 17320
},
{
"epoch": 4.89,
"learning_rate": 2.3123362569768768e-07,
"loss": 0.3667,
"step": 17330
},
{
"epoch": 4.89,
"learning_rate": 2.2553821619774464e-07,
"loss": 0.3662,
"step": 17340
},
{
"epoch": 4.89,
"learning_rate": 2.1984280669780157e-07,
"loss": 0.2084,
"step": 17350
},
{
"epoch": 4.89,
"learning_rate": 2.1414739719785853e-07,
"loss": 0.5334,
"step": 17360
},
{
"epoch": 4.9,
"learning_rate": 2.084519876979155e-07,
"loss": 0.3224,
"step": 17370
},
{
"epoch": 4.9,
"learning_rate": 2.0275657819797246e-07,
"loss": 0.2063,
"step": 17380
},
{
"epoch": 4.9,
"learning_rate": 1.970611686980294e-07,
"loss": 0.2368,
"step": 17390
},
{
"epoch": 4.91,
"learning_rate": 1.9136575919808638e-07,
"loss": 0.3701,
"step": 17400
},
{
"epoch": 4.91,
"learning_rate": 1.856703496981433e-07,
"loss": 0.2256,
"step": 17410
},
{
"epoch": 4.91,
"learning_rate": 1.7997494019820025e-07,
"loss": 0.2116,
"step": 17420
},
{
"epoch": 4.91,
"learning_rate": 1.7427953069825723e-07,
"loss": 0.4188,
"step": 17430
},
{
"epoch": 4.92,
"learning_rate": 1.6858412119831417e-07,
"loss": 0.3854,
"step": 17440
},
{
"epoch": 4.92,
"learning_rate": 1.628887116983711e-07,
"loss": 0.2101,
"step": 17450
},
{
"epoch": 4.92,
"learning_rate": 1.571933021984281e-07,
"loss": 0.3551,
"step": 17460
},
{
"epoch": 4.93,
"learning_rate": 1.5149789269848502e-07,
"loss": 0.3388,
"step": 17470
},
{
"epoch": 4.93,
"learning_rate": 1.4580248319854198e-07,
"loss": 0.478,
"step": 17480
},
{
"epoch": 4.93,
"learning_rate": 1.4010707369859895e-07,
"loss": 0.1387,
"step": 17490
},
{
"epoch": 4.93,
"learning_rate": 1.3441166419865588e-07,
"loss": 0.5615,
"step": 17500
},
{
"epoch": 4.93,
"eval_loss": 0.35653403401374817,
"eval_runtime": 212.1272,
"eval_samples_per_second": 9.527,
"eval_steps_per_second": 2.385,
"step": 17500
}
],
"logging_steps": 10,
"max_steps": 17735,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 666056317861888.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}