facebookgalactica-1.3b / trainer_state.json
sxx123's picture
Upload 11 files
c83f2b2
raw
history blame
57.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 9475,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.995778364116095e-05,
"loss": 0.7213,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 1.99155672823219e-05,
"loss": 0.6524,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 1.987335092348285e-05,
"loss": 0.645,
"step": 60
},
{
"epoch": 0.01,
"learning_rate": 1.98311345646438e-05,
"loss": 0.6331,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 1.978891820580475e-05,
"loss": 0.8327,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 1.97467018469657e-05,
"loss": 0.6765,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 1.970448548812665e-05,
"loss": 0.8367,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 1.96622691292876e-05,
"loss": 0.6278,
"step": 160
},
{
"epoch": 0.02,
"learning_rate": 1.962005277044855e-05,
"loss": 0.6081,
"step": 180
},
{
"epoch": 0.02,
"learning_rate": 1.95778364116095e-05,
"loss": 0.6156,
"step": 200
},
{
"epoch": 0.02,
"learning_rate": 1.953562005277045e-05,
"loss": 0.5935,
"step": 220
},
{
"epoch": 0.03,
"learning_rate": 1.94934036939314e-05,
"loss": 0.4673,
"step": 240
},
{
"epoch": 0.03,
"learning_rate": 1.945118733509235e-05,
"loss": 0.7047,
"step": 260
},
{
"epoch": 0.03,
"learning_rate": 1.94089709762533e-05,
"loss": 0.371,
"step": 280
},
{
"epoch": 0.03,
"learning_rate": 1.9366754617414248e-05,
"loss": 0.4462,
"step": 300
},
{
"epoch": 0.03,
"learning_rate": 1.93245382585752e-05,
"loss": 0.4483,
"step": 320
},
{
"epoch": 0.04,
"learning_rate": 1.9282321899736148e-05,
"loss": 0.6366,
"step": 340
},
{
"epoch": 0.04,
"learning_rate": 1.92401055408971e-05,
"loss": 0.5771,
"step": 360
},
{
"epoch": 0.04,
"learning_rate": 1.919788918205805e-05,
"loss": 0.6417,
"step": 380
},
{
"epoch": 0.04,
"learning_rate": 1.9155672823219e-05,
"loss": 0.5874,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 1.911345646437995e-05,
"loss": 0.7817,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 1.90712401055409e-05,
"loss": 0.5381,
"step": 440
},
{
"epoch": 0.05,
"learning_rate": 1.902902374670185e-05,
"loss": 0.6428,
"step": 460
},
{
"epoch": 0.05,
"learning_rate": 1.89868073878628e-05,
"loss": 0.6244,
"step": 480
},
{
"epoch": 0.05,
"learning_rate": 1.894459102902375e-05,
"loss": 0.5345,
"step": 500
},
{
"epoch": 0.05,
"learning_rate": 1.89023746701847e-05,
"loss": 0.6411,
"step": 520
},
{
"epoch": 0.06,
"learning_rate": 1.886015831134565e-05,
"loss": 0.6699,
"step": 540
},
{
"epoch": 0.06,
"learning_rate": 1.8817941952506598e-05,
"loss": 0.5649,
"step": 560
},
{
"epoch": 0.06,
"learning_rate": 1.877572559366755e-05,
"loss": 0.4107,
"step": 580
},
{
"epoch": 0.06,
"learning_rate": 1.8733509234828498e-05,
"loss": 0.3438,
"step": 600
},
{
"epoch": 0.07,
"learning_rate": 1.869129287598945e-05,
"loss": 0.5882,
"step": 620
},
{
"epoch": 0.07,
"learning_rate": 1.8649076517150398e-05,
"loss": 0.5149,
"step": 640
},
{
"epoch": 0.07,
"learning_rate": 1.8606860158311346e-05,
"loss": 0.4901,
"step": 660
},
{
"epoch": 0.07,
"learning_rate": 1.8564643799472297e-05,
"loss": 0.6303,
"step": 680
},
{
"epoch": 0.07,
"learning_rate": 1.8522427440633246e-05,
"loss": 0.5993,
"step": 700
},
{
"epoch": 0.08,
"learning_rate": 1.8480211081794197e-05,
"loss": 0.5937,
"step": 720
},
{
"epoch": 0.08,
"learning_rate": 1.8437994722955145e-05,
"loss": 0.6804,
"step": 740
},
{
"epoch": 0.08,
"learning_rate": 1.8395778364116097e-05,
"loss": 0.6918,
"step": 760
},
{
"epoch": 0.08,
"learning_rate": 1.8353562005277045e-05,
"loss": 0.648,
"step": 780
},
{
"epoch": 0.08,
"learning_rate": 1.8311345646437997e-05,
"loss": 0.4871,
"step": 800
},
{
"epoch": 0.09,
"learning_rate": 1.8269129287598945e-05,
"loss": 0.6124,
"step": 820
},
{
"epoch": 0.09,
"learning_rate": 1.8226912928759896e-05,
"loss": 0.6574,
"step": 840
},
{
"epoch": 0.09,
"learning_rate": 1.8184696569920845e-05,
"loss": 0.4374,
"step": 860
},
{
"epoch": 0.09,
"learning_rate": 1.8142480211081796e-05,
"loss": 0.6513,
"step": 880
},
{
"epoch": 0.09,
"learning_rate": 1.8100263852242744e-05,
"loss": 0.7598,
"step": 900
},
{
"epoch": 0.1,
"learning_rate": 1.8058047493403696e-05,
"loss": 0.6105,
"step": 920
},
{
"epoch": 0.1,
"learning_rate": 1.8015831134564644e-05,
"loss": 0.6589,
"step": 940
},
{
"epoch": 0.1,
"learning_rate": 1.7973614775725596e-05,
"loss": 0.4167,
"step": 960
},
{
"epoch": 0.1,
"learning_rate": 1.7931398416886544e-05,
"loss": 0.5617,
"step": 980
},
{
"epoch": 0.11,
"learning_rate": 1.7889182058047495e-05,
"loss": 0.4674,
"step": 1000
},
{
"epoch": 0.11,
"learning_rate": 1.7846965699208444e-05,
"loss": 0.7633,
"step": 1020
},
{
"epoch": 0.11,
"learning_rate": 1.7804749340369395e-05,
"loss": 0.5497,
"step": 1040
},
{
"epoch": 0.11,
"learning_rate": 1.7762532981530343e-05,
"loss": 0.3297,
"step": 1060
},
{
"epoch": 0.11,
"learning_rate": 1.7720316622691295e-05,
"loss": 0.5251,
"step": 1080
},
{
"epoch": 0.12,
"learning_rate": 1.7678100263852246e-05,
"loss": 0.6771,
"step": 1100
},
{
"epoch": 0.12,
"learning_rate": 1.7635883905013195e-05,
"loss": 0.6371,
"step": 1120
},
{
"epoch": 0.12,
"learning_rate": 1.7593667546174146e-05,
"loss": 0.5774,
"step": 1140
},
{
"epoch": 0.12,
"learning_rate": 1.7551451187335094e-05,
"loss": 0.5764,
"step": 1160
},
{
"epoch": 0.12,
"learning_rate": 1.7509234828496046e-05,
"loss": 0.5529,
"step": 1180
},
{
"epoch": 0.13,
"learning_rate": 1.7467018469656994e-05,
"loss": 0.6113,
"step": 1200
},
{
"epoch": 0.13,
"learning_rate": 1.7424802110817946e-05,
"loss": 0.5255,
"step": 1220
},
{
"epoch": 0.13,
"learning_rate": 1.7382585751978894e-05,
"loss": 0.4578,
"step": 1240
},
{
"epoch": 0.13,
"learning_rate": 1.7340369393139842e-05,
"loss": 0.4262,
"step": 1260
},
{
"epoch": 0.14,
"learning_rate": 1.7298153034300794e-05,
"loss": 0.5446,
"step": 1280
},
{
"epoch": 0.14,
"learning_rate": 1.7255936675461742e-05,
"loss": 0.5746,
"step": 1300
},
{
"epoch": 0.14,
"learning_rate": 1.7213720316622693e-05,
"loss": 0.6115,
"step": 1320
},
{
"epoch": 0.14,
"learning_rate": 1.717150395778364e-05,
"loss": 0.5768,
"step": 1340
},
{
"epoch": 0.14,
"learning_rate": 1.7129287598944593e-05,
"loss": 0.5883,
"step": 1360
},
{
"epoch": 0.15,
"learning_rate": 1.708707124010554e-05,
"loss": 0.4796,
"step": 1380
},
{
"epoch": 0.15,
"learning_rate": 1.7044854881266493e-05,
"loss": 0.3795,
"step": 1400
},
{
"epoch": 0.15,
"learning_rate": 1.700263852242744e-05,
"loss": 0.4755,
"step": 1420
},
{
"epoch": 0.15,
"learning_rate": 1.6960422163588393e-05,
"loss": 0.3378,
"step": 1440
},
{
"epoch": 0.15,
"learning_rate": 1.691820580474934e-05,
"loss": 0.55,
"step": 1460
},
{
"epoch": 0.16,
"learning_rate": 1.6875989445910292e-05,
"loss": 0.6011,
"step": 1480
},
{
"epoch": 0.16,
"learning_rate": 1.683377308707124e-05,
"loss": 0.8145,
"step": 1500
},
{
"epoch": 0.16,
"learning_rate": 1.6791556728232192e-05,
"loss": 0.5853,
"step": 1520
},
{
"epoch": 0.16,
"learning_rate": 1.674934036939314e-05,
"loss": 0.4712,
"step": 1540
},
{
"epoch": 0.16,
"learning_rate": 1.6707124010554092e-05,
"loss": 0.6111,
"step": 1560
},
{
"epoch": 0.17,
"learning_rate": 1.666490765171504e-05,
"loss": 0.7256,
"step": 1580
},
{
"epoch": 0.17,
"learning_rate": 1.6622691292875988e-05,
"loss": 0.6924,
"step": 1600
},
{
"epoch": 0.17,
"learning_rate": 1.658047493403694e-05,
"loss": 0.3561,
"step": 1620
},
{
"epoch": 0.17,
"learning_rate": 1.653825857519789e-05,
"loss": 0.9116,
"step": 1640
},
{
"epoch": 0.18,
"learning_rate": 1.649604221635884e-05,
"loss": 0.5273,
"step": 1660
},
{
"epoch": 0.18,
"learning_rate": 1.645382585751979e-05,
"loss": 0.6573,
"step": 1680
},
{
"epoch": 0.18,
"learning_rate": 1.641160949868074e-05,
"loss": 0.537,
"step": 1700
},
{
"epoch": 0.18,
"learning_rate": 1.636939313984169e-05,
"loss": 0.4107,
"step": 1720
},
{
"epoch": 0.18,
"learning_rate": 1.632717678100264e-05,
"loss": 0.5239,
"step": 1740
},
{
"epoch": 0.19,
"learning_rate": 1.628496042216359e-05,
"loss": 0.4254,
"step": 1760
},
{
"epoch": 0.19,
"learning_rate": 1.624274406332454e-05,
"loss": 0.7679,
"step": 1780
},
{
"epoch": 0.19,
"learning_rate": 1.620052770448549e-05,
"loss": 0.5916,
"step": 1800
},
{
"epoch": 0.19,
"learning_rate": 1.6158311345646442e-05,
"loss": 0.4871,
"step": 1820
},
{
"epoch": 0.19,
"learning_rate": 1.611609498680739e-05,
"loss": 0.354,
"step": 1840
},
{
"epoch": 0.2,
"learning_rate": 1.607387862796834e-05,
"loss": 0.7787,
"step": 1860
},
{
"epoch": 0.2,
"learning_rate": 1.603166226912929e-05,
"loss": 0.5968,
"step": 1880
},
{
"epoch": 0.2,
"learning_rate": 1.5989445910290238e-05,
"loss": 0.6694,
"step": 1900
},
{
"epoch": 0.2,
"learning_rate": 1.594722955145119e-05,
"loss": 0.6532,
"step": 1920
},
{
"epoch": 0.2,
"learning_rate": 1.5905013192612138e-05,
"loss": 0.6289,
"step": 1940
},
{
"epoch": 0.21,
"learning_rate": 1.586279683377309e-05,
"loss": 0.3906,
"step": 1960
},
{
"epoch": 0.21,
"learning_rate": 1.5820580474934038e-05,
"loss": 0.5025,
"step": 1980
},
{
"epoch": 0.21,
"learning_rate": 1.577836411609499e-05,
"loss": 0.7357,
"step": 2000
},
{
"epoch": 0.21,
"learning_rate": 1.5736147757255937e-05,
"loss": 0.5729,
"step": 2020
},
{
"epoch": 0.22,
"learning_rate": 1.569393139841689e-05,
"loss": 0.4374,
"step": 2040
},
{
"epoch": 0.22,
"learning_rate": 1.5651715039577837e-05,
"loss": 0.5097,
"step": 2060
},
{
"epoch": 0.22,
"learning_rate": 1.560949868073879e-05,
"loss": 0.5712,
"step": 2080
},
{
"epoch": 0.22,
"learning_rate": 1.5567282321899737e-05,
"loss": 0.6074,
"step": 2100
},
{
"epoch": 0.22,
"learning_rate": 1.552506596306069e-05,
"loss": 0.3627,
"step": 2120
},
{
"epoch": 0.23,
"learning_rate": 1.5482849604221637e-05,
"loss": 0.448,
"step": 2140
},
{
"epoch": 0.23,
"learning_rate": 1.5440633245382588e-05,
"loss": 0.5785,
"step": 2160
},
{
"epoch": 0.23,
"learning_rate": 1.5398416886543536e-05,
"loss": 0.4907,
"step": 2180
},
{
"epoch": 0.23,
"learning_rate": 1.5356200527704484e-05,
"loss": 0.6272,
"step": 2200
},
{
"epoch": 0.23,
"learning_rate": 1.5313984168865436e-05,
"loss": 0.527,
"step": 2220
},
{
"epoch": 0.24,
"learning_rate": 1.5271767810026384e-05,
"loss": 0.6151,
"step": 2240
},
{
"epoch": 0.24,
"learning_rate": 1.5229551451187336e-05,
"loss": 0.37,
"step": 2260
},
{
"epoch": 0.24,
"learning_rate": 1.5187335092348286e-05,
"loss": 0.4636,
"step": 2280
},
{
"epoch": 0.24,
"learning_rate": 1.5145118733509237e-05,
"loss": 0.5352,
"step": 2300
},
{
"epoch": 0.24,
"learning_rate": 1.5102902374670185e-05,
"loss": 0.7749,
"step": 2320
},
{
"epoch": 0.25,
"learning_rate": 1.5060686015831137e-05,
"loss": 0.6404,
"step": 2340
},
{
"epoch": 0.25,
"learning_rate": 1.5018469656992085e-05,
"loss": 0.5735,
"step": 2360
},
{
"epoch": 0.25,
"learning_rate": 1.4976253298153037e-05,
"loss": 0.5506,
"step": 2380
},
{
"epoch": 0.25,
"learning_rate": 1.4934036939313985e-05,
"loss": 0.5916,
"step": 2400
},
{
"epoch": 0.26,
"learning_rate": 1.4891820580474936e-05,
"loss": 0.4593,
"step": 2420
},
{
"epoch": 0.26,
"learning_rate": 1.4849604221635885e-05,
"loss": 0.5974,
"step": 2440
},
{
"epoch": 0.26,
"learning_rate": 1.4807387862796836e-05,
"loss": 0.8557,
"step": 2460
},
{
"epoch": 0.26,
"learning_rate": 1.4765171503957784e-05,
"loss": 0.6341,
"step": 2480
},
{
"epoch": 0.26,
"learning_rate": 1.4722955145118736e-05,
"loss": 0.5251,
"step": 2500
},
{
"epoch": 0.27,
"learning_rate": 1.4680738786279684e-05,
"loss": 0.5603,
"step": 2520
},
{
"epoch": 0.27,
"learning_rate": 1.4638522427440634e-05,
"loss": 0.5832,
"step": 2540
},
{
"epoch": 0.27,
"learning_rate": 1.4596306068601584e-05,
"loss": 0.5408,
"step": 2560
},
{
"epoch": 0.27,
"learning_rate": 1.4554089709762534e-05,
"loss": 0.5024,
"step": 2580
},
{
"epoch": 0.27,
"learning_rate": 1.4511873350923484e-05,
"loss": 0.5729,
"step": 2600
},
{
"epoch": 0.28,
"learning_rate": 1.4469656992084433e-05,
"loss": 0.6356,
"step": 2620
},
{
"epoch": 0.28,
"learning_rate": 1.4427440633245385e-05,
"loss": 0.3953,
"step": 2640
},
{
"epoch": 0.28,
"learning_rate": 1.4385224274406333e-05,
"loss": 0.634,
"step": 2660
},
{
"epoch": 0.28,
"learning_rate": 1.4343007915567285e-05,
"loss": 0.4937,
"step": 2680
},
{
"epoch": 0.28,
"learning_rate": 1.4300791556728233e-05,
"loss": 0.5123,
"step": 2700
},
{
"epoch": 0.29,
"learning_rate": 1.4258575197889185e-05,
"loss": 0.5106,
"step": 2720
},
{
"epoch": 0.29,
"learning_rate": 1.4216358839050133e-05,
"loss": 0.634,
"step": 2740
},
{
"epoch": 0.29,
"learning_rate": 1.4174142480211084e-05,
"loss": 0.7666,
"step": 2760
},
{
"epoch": 0.29,
"learning_rate": 1.4131926121372032e-05,
"loss": 0.5169,
"step": 2780
},
{
"epoch": 0.3,
"learning_rate": 1.4089709762532984e-05,
"loss": 0.7099,
"step": 2800
},
{
"epoch": 0.3,
"learning_rate": 1.4047493403693932e-05,
"loss": 0.6799,
"step": 2820
},
{
"epoch": 0.3,
"learning_rate": 1.4005277044854882e-05,
"loss": 0.5833,
"step": 2840
},
{
"epoch": 0.3,
"learning_rate": 1.3963060686015832e-05,
"loss": 0.528,
"step": 2860
},
{
"epoch": 0.3,
"learning_rate": 1.3920844327176782e-05,
"loss": 0.8601,
"step": 2880
},
{
"epoch": 0.31,
"learning_rate": 1.3878627968337732e-05,
"loss": 0.6472,
"step": 2900
},
{
"epoch": 0.31,
"learning_rate": 1.3836411609498682e-05,
"loss": 0.6743,
"step": 2920
},
{
"epoch": 0.31,
"learning_rate": 1.3794195250659631e-05,
"loss": 0.5751,
"step": 2940
},
{
"epoch": 0.31,
"learning_rate": 1.3751978891820581e-05,
"loss": 0.5818,
"step": 2960
},
{
"epoch": 0.31,
"learning_rate": 1.3709762532981531e-05,
"loss": 0.6772,
"step": 2980
},
{
"epoch": 0.32,
"learning_rate": 1.3667546174142481e-05,
"loss": 0.6443,
"step": 3000
},
{
"epoch": 0.32,
"learning_rate": 1.3625329815303433e-05,
"loss": 0.7561,
"step": 3020
},
{
"epoch": 0.32,
"learning_rate": 1.3583113456464381e-05,
"loss": 0.5368,
"step": 3040
},
{
"epoch": 0.32,
"learning_rate": 1.3540897097625332e-05,
"loss": 0.5932,
"step": 3060
},
{
"epoch": 0.33,
"learning_rate": 1.349868073878628e-05,
"loss": 0.646,
"step": 3080
},
{
"epoch": 0.33,
"learning_rate": 1.3456464379947232e-05,
"loss": 0.5333,
"step": 3100
},
{
"epoch": 0.33,
"learning_rate": 1.341424802110818e-05,
"loss": 0.5377,
"step": 3120
},
{
"epoch": 0.33,
"learning_rate": 1.3372031662269129e-05,
"loss": 0.3827,
"step": 3140
},
{
"epoch": 0.33,
"learning_rate": 1.332981530343008e-05,
"loss": 0.4472,
"step": 3160
},
{
"epoch": 0.34,
"learning_rate": 1.328759894459103e-05,
"loss": 0.6726,
"step": 3180
},
{
"epoch": 0.34,
"learning_rate": 1.324538258575198e-05,
"loss": 0.5824,
"step": 3200
},
{
"epoch": 0.34,
"learning_rate": 1.320316622691293e-05,
"loss": 0.6857,
"step": 3220
},
{
"epoch": 0.34,
"learning_rate": 1.316094986807388e-05,
"loss": 0.4948,
"step": 3240
},
{
"epoch": 0.34,
"learning_rate": 1.311873350923483e-05,
"loss": 0.4676,
"step": 3260
},
{
"epoch": 0.35,
"learning_rate": 1.307651715039578e-05,
"loss": 0.4412,
"step": 3280
},
{
"epoch": 0.35,
"learning_rate": 1.303430079155673e-05,
"loss": 0.5293,
"step": 3300
},
{
"epoch": 0.35,
"learning_rate": 1.2992084432717679e-05,
"loss": 0.6052,
"step": 3320
},
{
"epoch": 0.35,
"learning_rate": 1.2949868073878629e-05,
"loss": 0.5418,
"step": 3340
},
{
"epoch": 0.35,
"learning_rate": 1.290765171503958e-05,
"loss": 0.6428,
"step": 3360
},
{
"epoch": 0.36,
"learning_rate": 1.2865435356200529e-05,
"loss": 0.6619,
"step": 3380
},
{
"epoch": 0.36,
"learning_rate": 1.282321899736148e-05,
"loss": 0.5869,
"step": 3400
},
{
"epoch": 0.36,
"learning_rate": 1.2781002638522428e-05,
"loss": 0.6139,
"step": 3420
},
{
"epoch": 0.36,
"learning_rate": 1.273878627968338e-05,
"loss": 0.5202,
"step": 3440
},
{
"epoch": 0.37,
"learning_rate": 1.2696569920844328e-05,
"loss": 0.472,
"step": 3460
},
{
"epoch": 0.37,
"learning_rate": 1.2654353562005276e-05,
"loss": 0.6936,
"step": 3480
},
{
"epoch": 0.37,
"learning_rate": 1.2612137203166228e-05,
"loss": 0.5366,
"step": 3500
},
{
"epoch": 0.37,
"learning_rate": 1.2569920844327176e-05,
"loss": 0.6306,
"step": 3520
},
{
"epoch": 0.37,
"learning_rate": 1.2527704485488128e-05,
"loss": 0.6323,
"step": 3540
},
{
"epoch": 0.38,
"learning_rate": 1.2485488126649078e-05,
"loss": 0.5825,
"step": 3560
},
{
"epoch": 0.38,
"learning_rate": 1.2443271767810027e-05,
"loss": 0.5203,
"step": 3580
},
{
"epoch": 0.38,
"learning_rate": 1.2401055408970977e-05,
"loss": 0.461,
"step": 3600
},
{
"epoch": 0.38,
"learning_rate": 1.2358839050131927e-05,
"loss": 0.4318,
"step": 3620
},
{
"epoch": 0.38,
"learning_rate": 1.2316622691292877e-05,
"loss": 0.6699,
"step": 3640
},
{
"epoch": 0.39,
"learning_rate": 1.2274406332453827e-05,
"loss": 0.4229,
"step": 3660
},
{
"epoch": 0.39,
"learning_rate": 1.2232189973614777e-05,
"loss": 0.4691,
"step": 3680
},
{
"epoch": 0.39,
"learning_rate": 1.2189973614775727e-05,
"loss": 0.5152,
"step": 3700
},
{
"epoch": 0.39,
"learning_rate": 1.2147757255936677e-05,
"loss": 0.4918,
"step": 3720
},
{
"epoch": 0.39,
"learning_rate": 1.2105540897097628e-05,
"loss": 0.7893,
"step": 3740
},
{
"epoch": 0.4,
"learning_rate": 1.2063324538258576e-05,
"loss": 0.6167,
"step": 3760
},
{
"epoch": 0.4,
"learning_rate": 1.2021108179419525e-05,
"loss": 0.4979,
"step": 3780
},
{
"epoch": 0.4,
"learning_rate": 1.1978891820580476e-05,
"loss": 0.5982,
"step": 3800
},
{
"epoch": 0.4,
"learning_rate": 1.1936675461741424e-05,
"loss": 0.5404,
"step": 3820
},
{
"epoch": 0.41,
"learning_rate": 1.1894459102902376e-05,
"loss": 0.5102,
"step": 3840
},
{
"epoch": 0.41,
"learning_rate": 1.1852242744063324e-05,
"loss": 0.7537,
"step": 3860
},
{
"epoch": 0.41,
"learning_rate": 1.1810026385224276e-05,
"loss": 0.5364,
"step": 3880
},
{
"epoch": 0.41,
"learning_rate": 1.1767810026385225e-05,
"loss": 0.5699,
"step": 3900
},
{
"epoch": 0.41,
"learning_rate": 1.1725593667546175e-05,
"loss": 0.6618,
"step": 3920
},
{
"epoch": 0.42,
"learning_rate": 1.1683377308707125e-05,
"loss": 0.457,
"step": 3940
},
{
"epoch": 0.42,
"learning_rate": 1.1641160949868075e-05,
"loss": 0.3489,
"step": 3960
},
{
"epoch": 0.42,
"learning_rate": 1.1598944591029025e-05,
"loss": 0.2943,
"step": 3980
},
{
"epoch": 0.42,
"learning_rate": 1.1556728232189975e-05,
"loss": 0.4931,
"step": 4000
},
{
"epoch": 0.42,
"learning_rate": 1.1514511873350925e-05,
"loss": 0.5655,
"step": 4020
},
{
"epoch": 0.43,
"learning_rate": 1.1472295514511875e-05,
"loss": 0.514,
"step": 4040
},
{
"epoch": 0.43,
"learning_rate": 1.1430079155672824e-05,
"loss": 0.3397,
"step": 4060
},
{
"epoch": 0.43,
"learning_rate": 1.1387862796833773e-05,
"loss": 0.524,
"step": 4080
},
{
"epoch": 0.43,
"learning_rate": 1.1345646437994724e-05,
"loss": 0.6505,
"step": 4100
},
{
"epoch": 0.43,
"learning_rate": 1.1303430079155672e-05,
"loss": 0.425,
"step": 4120
},
{
"epoch": 0.44,
"learning_rate": 1.1261213720316624e-05,
"loss": 0.62,
"step": 4140
},
{
"epoch": 0.44,
"learning_rate": 1.1218997361477572e-05,
"loss": 0.5354,
"step": 4160
},
{
"epoch": 0.44,
"learning_rate": 1.1176781002638524e-05,
"loss": 0.6713,
"step": 4180
},
{
"epoch": 0.44,
"learning_rate": 1.1134564643799472e-05,
"loss": 0.4955,
"step": 4200
},
{
"epoch": 0.45,
"learning_rate": 1.1092348284960423e-05,
"loss": 0.5697,
"step": 4220
},
{
"epoch": 0.45,
"learning_rate": 1.1050131926121372e-05,
"loss": 0.5844,
"step": 4240
},
{
"epoch": 0.45,
"learning_rate": 1.1007915567282323e-05,
"loss": 0.7378,
"step": 4260
},
{
"epoch": 0.45,
"learning_rate": 1.0965699208443273e-05,
"loss": 0.5467,
"step": 4280
},
{
"epoch": 0.45,
"learning_rate": 1.0923482849604223e-05,
"loss": 0.4497,
"step": 4300
},
{
"epoch": 0.46,
"learning_rate": 1.0881266490765173e-05,
"loss": 0.6415,
"step": 4320
},
{
"epoch": 0.46,
"learning_rate": 1.0839050131926123e-05,
"loss": 0.3529,
"step": 4340
},
{
"epoch": 0.46,
"learning_rate": 1.0796833773087073e-05,
"loss": 0.5863,
"step": 4360
},
{
"epoch": 0.46,
"learning_rate": 1.0754617414248022e-05,
"loss": 0.6605,
"step": 4380
},
{
"epoch": 0.46,
"learning_rate": 1.0712401055408972e-05,
"loss": 0.5001,
"step": 4400
},
{
"epoch": 0.47,
"learning_rate": 1.067018469656992e-05,
"loss": 0.5911,
"step": 4420
},
{
"epoch": 0.47,
"learning_rate": 1.0627968337730872e-05,
"loss": 0.4878,
"step": 4440
},
{
"epoch": 0.47,
"learning_rate": 1.058575197889182e-05,
"loss": 0.3502,
"step": 4460
},
{
"epoch": 0.47,
"learning_rate": 1.0543535620052772e-05,
"loss": 0.5124,
"step": 4480
},
{
"epoch": 0.47,
"learning_rate": 1.050131926121372e-05,
"loss": 0.6404,
"step": 4500
},
{
"epoch": 0.48,
"learning_rate": 1.0459102902374672e-05,
"loss": 0.5595,
"step": 4520
},
{
"epoch": 0.48,
"learning_rate": 1.041688654353562e-05,
"loss": 0.4514,
"step": 4540
},
{
"epoch": 0.48,
"learning_rate": 1.0374670184696571e-05,
"loss": 0.3171,
"step": 4560
},
{
"epoch": 0.48,
"learning_rate": 1.033245382585752e-05,
"loss": 0.1881,
"step": 4580
},
{
"epoch": 0.49,
"learning_rate": 1.0290237467018471e-05,
"loss": 0.6278,
"step": 4600
},
{
"epoch": 0.49,
"learning_rate": 1.024802110817942e-05,
"loss": 0.6495,
"step": 4620
},
{
"epoch": 0.49,
"learning_rate": 1.020580474934037e-05,
"loss": 0.2513,
"step": 4640
},
{
"epoch": 0.49,
"learning_rate": 1.016358839050132e-05,
"loss": 0.5532,
"step": 4660
},
{
"epoch": 0.49,
"learning_rate": 1.012137203166227e-05,
"loss": 0.5993,
"step": 4680
},
{
"epoch": 0.5,
"learning_rate": 1.007915567282322e-05,
"loss": 0.4503,
"step": 4700
},
{
"epoch": 0.5,
"learning_rate": 1.0036939313984169e-05,
"loss": 0.4031,
"step": 4720
},
{
"epoch": 0.5,
"learning_rate": 9.99472295514512e-06,
"loss": 0.7682,
"step": 4740
},
{
"epoch": 0.5,
"learning_rate": 9.95250659630607e-06,
"loss": 0.7421,
"step": 4760
},
{
"epoch": 0.5,
"learning_rate": 9.91029023746702e-06,
"loss": 0.4206,
"step": 4780
},
{
"epoch": 0.51,
"learning_rate": 9.86807387862797e-06,
"loss": 0.4319,
"step": 4800
},
{
"epoch": 0.51,
"learning_rate": 9.82585751978892e-06,
"loss": 0.5304,
"step": 4820
},
{
"epoch": 0.51,
"learning_rate": 9.78364116094987e-06,
"loss": 0.6692,
"step": 4840
},
{
"epoch": 0.51,
"learning_rate": 9.741424802110818e-06,
"loss": 0.595,
"step": 4860
},
{
"epoch": 0.52,
"learning_rate": 9.699208443271768e-06,
"loss": 0.496,
"step": 4880
},
{
"epoch": 0.52,
"learning_rate": 9.656992084432717e-06,
"loss": 0.6153,
"step": 4900
},
{
"epoch": 0.52,
"learning_rate": 9.614775725593667e-06,
"loss": 0.3291,
"step": 4920
},
{
"epoch": 0.52,
"learning_rate": 9.572559366754617e-06,
"loss": 0.5457,
"step": 4940
},
{
"epoch": 0.52,
"learning_rate": 9.530343007915567e-06,
"loss": 0.5569,
"step": 4960
},
{
"epoch": 0.53,
"learning_rate": 9.488126649076517e-06,
"loss": 0.6754,
"step": 4980
},
{
"epoch": 0.53,
"learning_rate": 9.445910290237469e-06,
"loss": 0.5545,
"step": 5000
},
{
"epoch": 0.53,
"learning_rate": 9.403693931398418e-06,
"loss": 0.6471,
"step": 5020
},
{
"epoch": 0.53,
"learning_rate": 9.361477572559368e-06,
"loss": 0.5456,
"step": 5040
},
{
"epoch": 0.53,
"learning_rate": 9.319261213720318e-06,
"loss": 0.617,
"step": 5060
},
{
"epoch": 0.54,
"learning_rate": 9.277044854881268e-06,
"loss": 0.4465,
"step": 5080
},
{
"epoch": 0.54,
"learning_rate": 9.234828496042218e-06,
"loss": 0.5831,
"step": 5100
},
{
"epoch": 0.54,
"learning_rate": 9.192612137203168e-06,
"loss": 0.465,
"step": 5120
},
{
"epoch": 0.54,
"learning_rate": 9.150395778364118e-06,
"loss": 0.5952,
"step": 5140
},
{
"epoch": 0.54,
"learning_rate": 9.108179419525068e-06,
"loss": 0.5419,
"step": 5160
},
{
"epoch": 0.55,
"learning_rate": 9.065963060686016e-06,
"loss": 0.4287,
"step": 5180
},
{
"epoch": 0.55,
"learning_rate": 9.023746701846966e-06,
"loss": 0.5648,
"step": 5200
},
{
"epoch": 0.55,
"learning_rate": 8.981530343007915e-06,
"loss": 0.6586,
"step": 5220
},
{
"epoch": 0.55,
"learning_rate": 8.939313984168865e-06,
"loss": 0.6909,
"step": 5240
},
{
"epoch": 0.56,
"learning_rate": 8.897097625329815e-06,
"loss": 0.7338,
"step": 5260
},
{
"epoch": 0.56,
"learning_rate": 8.854881266490765e-06,
"loss": 0.6468,
"step": 5280
},
{
"epoch": 0.56,
"learning_rate": 8.812664907651715e-06,
"loss": 0.4928,
"step": 5300
},
{
"epoch": 0.56,
"learning_rate": 8.770448548812665e-06,
"loss": 0.5157,
"step": 5320
},
{
"epoch": 0.56,
"learning_rate": 8.728232189973615e-06,
"loss": 0.4733,
"step": 5340
},
{
"epoch": 0.57,
"learning_rate": 8.686015831134566e-06,
"loss": 0.6383,
"step": 5360
},
{
"epoch": 0.57,
"learning_rate": 8.643799472295516e-06,
"loss": 0.4141,
"step": 5380
},
{
"epoch": 0.57,
"learning_rate": 8.601583113456466e-06,
"loss": 0.5483,
"step": 5400
},
{
"epoch": 0.57,
"learning_rate": 8.559366754617416e-06,
"loss": 0.4199,
"step": 5420
},
{
"epoch": 0.57,
"learning_rate": 8.517150395778366e-06,
"loss": 0.5284,
"step": 5440
},
{
"epoch": 0.58,
"learning_rate": 8.474934036939316e-06,
"loss": 0.5165,
"step": 5460
},
{
"epoch": 0.58,
"learning_rate": 8.432717678100266e-06,
"loss": 0.5744,
"step": 5480
},
{
"epoch": 0.58,
"learning_rate": 8.390501319261214e-06,
"loss": 0.5308,
"step": 5500
},
{
"epoch": 0.58,
"learning_rate": 8.348284960422164e-06,
"loss": 0.5786,
"step": 5520
},
{
"epoch": 0.58,
"learning_rate": 8.306068601583113e-06,
"loss": 0.5103,
"step": 5540
},
{
"epoch": 0.59,
"learning_rate": 8.263852242744063e-06,
"loss": 0.3575,
"step": 5560
},
{
"epoch": 0.59,
"learning_rate": 8.221635883905013e-06,
"loss": 0.6229,
"step": 5580
},
{
"epoch": 0.59,
"learning_rate": 8.179419525065963e-06,
"loss": 0.6278,
"step": 5600
},
{
"epoch": 0.59,
"learning_rate": 8.137203166226913e-06,
"loss": 0.714,
"step": 5620
},
{
"epoch": 0.6,
"learning_rate": 8.094986807387863e-06,
"loss": 0.425,
"step": 5640
},
{
"epoch": 0.6,
"learning_rate": 8.052770448548813e-06,
"loss": 0.4628,
"step": 5660
},
{
"epoch": 0.6,
"learning_rate": 8.010554089709763e-06,
"loss": 0.5427,
"step": 5680
},
{
"epoch": 0.6,
"learning_rate": 7.968337730870712e-06,
"loss": 0.5701,
"step": 5700
},
{
"epoch": 0.6,
"learning_rate": 7.926121372031664e-06,
"loss": 0.4103,
"step": 5720
},
{
"epoch": 0.61,
"learning_rate": 7.883905013192614e-06,
"loss": 0.3837,
"step": 5740
},
{
"epoch": 0.61,
"learning_rate": 7.841688654353564e-06,
"loss": 0.6341,
"step": 5760
},
{
"epoch": 0.61,
"learning_rate": 7.799472295514514e-06,
"loss": 0.3624,
"step": 5780
},
{
"epoch": 0.61,
"learning_rate": 7.757255936675462e-06,
"loss": 0.4841,
"step": 5800
},
{
"epoch": 0.61,
"learning_rate": 7.715039577836412e-06,
"loss": 0.4588,
"step": 5820
},
{
"epoch": 0.62,
"learning_rate": 7.672823218997362e-06,
"loss": 0.3238,
"step": 5840
},
{
"epoch": 0.62,
"learning_rate": 7.630606860158311e-06,
"loss": 0.4156,
"step": 5860
},
{
"epoch": 0.62,
"learning_rate": 7.588390501319261e-06,
"loss": 0.556,
"step": 5880
},
{
"epoch": 0.62,
"learning_rate": 7.546174142480211e-06,
"loss": 0.3624,
"step": 5900
},
{
"epoch": 0.62,
"learning_rate": 7.503957783641161e-06,
"loss": 0.6702,
"step": 5920
},
{
"epoch": 0.63,
"learning_rate": 7.461741424802112e-06,
"loss": 0.6105,
"step": 5940
},
{
"epoch": 0.63,
"learning_rate": 7.419525065963062e-06,
"loss": 0.3351,
"step": 5960
},
{
"epoch": 0.63,
"learning_rate": 7.3773087071240116e-06,
"loss": 0.5021,
"step": 5980
},
{
"epoch": 0.63,
"learning_rate": 7.3350923482849614e-06,
"loss": 0.6574,
"step": 6000
},
{
"epoch": 0.64,
"learning_rate": 7.292875989445911e-06,
"loss": 0.3253,
"step": 6020
},
{
"epoch": 0.64,
"learning_rate": 7.250659630606861e-06,
"loss": 0.6144,
"step": 6040
},
{
"epoch": 0.64,
"learning_rate": 7.208443271767811e-06,
"loss": 0.6338,
"step": 6060
},
{
"epoch": 0.64,
"learning_rate": 7.166226912928761e-06,
"loss": 0.5613,
"step": 6080
},
{
"epoch": 0.64,
"learning_rate": 7.124010554089711e-06,
"loss": 0.4417,
"step": 6100
},
{
"epoch": 0.65,
"learning_rate": 7.08179419525066e-06,
"loss": 0.6518,
"step": 6120
},
{
"epoch": 0.65,
"learning_rate": 7.03957783641161e-06,
"loss": 0.4236,
"step": 6140
},
{
"epoch": 0.65,
"learning_rate": 6.99736147757256e-06,
"loss": 0.7198,
"step": 6160
},
{
"epoch": 0.65,
"learning_rate": 6.9551451187335095e-06,
"loss": 0.5412,
"step": 6180
},
{
"epoch": 0.65,
"learning_rate": 6.912928759894459e-06,
"loss": 0.4579,
"step": 6200
},
{
"epoch": 0.66,
"learning_rate": 6.870712401055409e-06,
"loss": 0.5445,
"step": 6220
},
{
"epoch": 0.66,
"learning_rate": 6.828496042216359e-06,
"loss": 0.3513,
"step": 6240
},
{
"epoch": 0.66,
"learning_rate": 6.786279683377309e-06,
"loss": 0.4794,
"step": 6260
},
{
"epoch": 0.66,
"learning_rate": 6.744063324538259e-06,
"loss": 0.48,
"step": 6280
},
{
"epoch": 0.66,
"learning_rate": 6.701846965699209e-06,
"loss": 0.6672,
"step": 6300
},
{
"epoch": 0.67,
"learning_rate": 6.659630606860159e-06,
"loss": 0.3798,
"step": 6320
},
{
"epoch": 0.67,
"learning_rate": 6.617414248021109e-06,
"loss": 0.3502,
"step": 6340
},
{
"epoch": 0.67,
"learning_rate": 6.575197889182059e-06,
"loss": 0.5835,
"step": 6360
},
{
"epoch": 0.67,
"learning_rate": 6.532981530343009e-06,
"loss": 0.5271,
"step": 6380
},
{
"epoch": 0.68,
"learning_rate": 6.490765171503959e-06,
"loss": 0.4799,
"step": 6400
},
{
"epoch": 0.68,
"learning_rate": 6.448548812664909e-06,
"loss": 0.5692,
"step": 6420
},
{
"epoch": 0.68,
"learning_rate": 6.406332453825858e-06,
"loss": 0.5402,
"step": 6440
},
{
"epoch": 0.68,
"learning_rate": 6.364116094986808e-06,
"loss": 0.4828,
"step": 6460
},
{
"epoch": 0.68,
"learning_rate": 6.3218997361477576e-06,
"loss": 0.515,
"step": 6480
},
{
"epoch": 0.69,
"learning_rate": 6.2796833773087074e-06,
"loss": 0.5592,
"step": 6500
},
{
"epoch": 0.69,
"learning_rate": 6.237467018469657e-06,
"loss": 0.6036,
"step": 6520
},
{
"epoch": 0.69,
"learning_rate": 6.195250659630607e-06,
"loss": 0.6602,
"step": 6540
},
{
"epoch": 0.69,
"learning_rate": 6.153034300791557e-06,
"loss": 0.6557,
"step": 6560
},
{
"epoch": 0.69,
"learning_rate": 6.110817941952507e-06,
"loss": 0.6216,
"step": 6580
},
{
"epoch": 0.7,
"learning_rate": 6.068601583113457e-06,
"loss": 0.4562,
"step": 6600
},
{
"epoch": 0.7,
"learning_rate": 6.026385224274407e-06,
"loss": 0.6253,
"step": 6620
},
{
"epoch": 0.7,
"learning_rate": 5.9841688654353566e-06,
"loss": 0.4678,
"step": 6640
},
{
"epoch": 0.7,
"learning_rate": 5.9419525065963064e-06,
"loss": 0.7542,
"step": 6660
},
{
"epoch": 0.71,
"learning_rate": 5.899736147757257e-06,
"loss": 0.5984,
"step": 6680
},
{
"epoch": 0.71,
"learning_rate": 5.857519788918207e-06,
"loss": 0.4195,
"step": 6700
},
{
"epoch": 0.71,
"learning_rate": 5.815303430079157e-06,
"loss": 0.6876,
"step": 6720
},
{
"epoch": 0.71,
"learning_rate": 5.773087071240105e-06,
"loss": 0.486,
"step": 6740
},
{
"epoch": 0.71,
"learning_rate": 5.730870712401056e-06,
"loss": 0.706,
"step": 6760
},
{
"epoch": 0.72,
"learning_rate": 5.688654353562006e-06,
"loss": 0.5317,
"step": 6780
},
{
"epoch": 0.72,
"learning_rate": 5.6464379947229556e-06,
"loss": 0.4366,
"step": 6800
},
{
"epoch": 0.72,
"learning_rate": 5.6042216358839054e-06,
"loss": 0.3125,
"step": 6820
},
{
"epoch": 0.72,
"learning_rate": 5.562005277044855e-06,
"loss": 0.4181,
"step": 6840
},
{
"epoch": 0.72,
"learning_rate": 5.519788918205805e-06,
"loss": 0.6685,
"step": 6860
},
{
"epoch": 0.73,
"learning_rate": 5.477572559366755e-06,
"loss": 0.4767,
"step": 6880
},
{
"epoch": 0.73,
"learning_rate": 5.435356200527705e-06,
"loss": 0.5124,
"step": 6900
},
{
"epoch": 0.73,
"learning_rate": 5.393139841688655e-06,
"loss": 0.3873,
"step": 6920
},
{
"epoch": 0.73,
"learning_rate": 5.350923482849605e-06,
"loss": 0.6609,
"step": 6940
},
{
"epoch": 0.73,
"learning_rate": 5.3087071240105546e-06,
"loss": 0.5962,
"step": 6960
},
{
"epoch": 0.74,
"learning_rate": 5.2664907651715044e-06,
"loss": 0.4593,
"step": 6980
},
{
"epoch": 0.74,
"learning_rate": 5.224274406332454e-06,
"loss": 0.3752,
"step": 7000
},
{
"epoch": 0.74,
"learning_rate": 5.182058047493404e-06,
"loss": 0.6012,
"step": 7020
},
{
"epoch": 0.74,
"learning_rate": 5.139841688654355e-06,
"loss": 0.3253,
"step": 7040
},
{
"epoch": 0.75,
"learning_rate": 5.097625329815303e-06,
"loss": 0.2852,
"step": 7060
},
{
"epoch": 0.75,
"learning_rate": 5.055408970976253e-06,
"loss": 0.4875,
"step": 7080
},
{
"epoch": 0.75,
"learning_rate": 5.013192612137203e-06,
"loss": 0.6634,
"step": 7100
},
{
"epoch": 0.75,
"learning_rate": 4.9709762532981536e-06,
"loss": 0.5397,
"step": 7120
},
{
"epoch": 0.75,
"learning_rate": 4.9287598944591034e-06,
"loss": 0.4543,
"step": 7140
},
{
"epoch": 0.76,
"learning_rate": 4.886543535620053e-06,
"loss": 0.4,
"step": 7160
},
{
"epoch": 0.76,
"learning_rate": 4.844327176781003e-06,
"loss": 0.6291,
"step": 7180
},
{
"epoch": 0.76,
"learning_rate": 4.802110817941953e-06,
"loss": 0.5296,
"step": 7200
},
{
"epoch": 0.76,
"learning_rate": 4.759894459102903e-06,
"loss": 0.6181,
"step": 7220
},
{
"epoch": 0.76,
"learning_rate": 4.717678100263853e-06,
"loss": 0.4267,
"step": 7240
},
{
"epoch": 0.77,
"learning_rate": 4.675461741424803e-06,
"loss": 0.55,
"step": 7260
},
{
"epoch": 0.77,
"learning_rate": 4.6332453825857525e-06,
"loss": 0.5503,
"step": 7280
},
{
"epoch": 0.77,
"learning_rate": 4.5910290237467024e-06,
"loss": 0.65,
"step": 7300
},
{
"epoch": 0.77,
"learning_rate": 4.548812664907652e-06,
"loss": 0.5358,
"step": 7320
},
{
"epoch": 0.77,
"learning_rate": 4.506596306068602e-06,
"loss": 0.5237,
"step": 7340
},
{
"epoch": 0.78,
"learning_rate": 4.464379947229552e-06,
"loss": 0.7353,
"step": 7360
},
{
"epoch": 0.78,
"learning_rate": 4.422163588390502e-06,
"loss": 0.454,
"step": 7380
},
{
"epoch": 0.78,
"learning_rate": 4.379947229551452e-06,
"loss": 0.508,
"step": 7400
},
{
"epoch": 0.78,
"learning_rate": 4.337730870712402e-06,
"loss": 0.6651,
"step": 7420
},
{
"epoch": 0.79,
"learning_rate": 4.295514511873351e-06,
"loss": 0.449,
"step": 7440
},
{
"epoch": 0.79,
"learning_rate": 4.2532981530343006e-06,
"loss": 0.6682,
"step": 7460
},
{
"epoch": 0.79,
"learning_rate": 4.211081794195251e-06,
"loss": 0.4796,
"step": 7480
},
{
"epoch": 0.79,
"learning_rate": 4.168865435356201e-06,
"loss": 0.4207,
"step": 7500
},
{
"epoch": 0.79,
"learning_rate": 4.126649076517151e-06,
"loss": 0.4697,
"step": 7520
},
{
"epoch": 0.8,
"learning_rate": 4.084432717678101e-06,
"loss": 0.4785,
"step": 7540
},
{
"epoch": 0.8,
"learning_rate": 4.042216358839051e-06,
"loss": 0.4247,
"step": 7560
},
{
"epoch": 0.8,
"learning_rate": 4.000000000000001e-06,
"loss": 0.3963,
"step": 7580
},
{
"epoch": 0.8,
"learning_rate": 3.95778364116095e-06,
"loss": 0.5378,
"step": 7600
},
{
"epoch": 0.8,
"learning_rate": 3.9155672823218996e-06,
"loss": 0.4826,
"step": 7620
},
{
"epoch": 0.81,
"learning_rate": 3.8733509234828494e-06,
"loss": 0.5258,
"step": 7640
},
{
"epoch": 0.81,
"learning_rate": 3.831134564643799e-06,
"loss": 0.7325,
"step": 7660
},
{
"epoch": 0.81,
"learning_rate": 3.7889182058047496e-06,
"loss": 0.6236,
"step": 7680
},
{
"epoch": 0.81,
"learning_rate": 3.7467018469656995e-06,
"loss": 0.5897,
"step": 7700
},
{
"epoch": 0.81,
"learning_rate": 3.7044854881266494e-06,
"loss": 0.4477,
"step": 7720
},
{
"epoch": 0.82,
"learning_rate": 3.6622691292875997e-06,
"loss": 0.5014,
"step": 7740
},
{
"epoch": 0.82,
"learning_rate": 3.6200527704485487e-06,
"loss": 0.6654,
"step": 7760
},
{
"epoch": 0.82,
"learning_rate": 3.577836411609499e-06,
"loss": 0.5581,
"step": 7780
},
{
"epoch": 0.82,
"learning_rate": 3.535620052770449e-06,
"loss": 0.5271,
"step": 7800
},
{
"epoch": 0.83,
"learning_rate": 3.4934036939313987e-06,
"loss": 0.6031,
"step": 7820
},
{
"epoch": 0.83,
"learning_rate": 3.4511873350923486e-06,
"loss": 0.3737,
"step": 7840
},
{
"epoch": 0.83,
"learning_rate": 3.4089709762532985e-06,
"loss": 0.5038,
"step": 7860
},
{
"epoch": 0.83,
"learning_rate": 3.3667546174142484e-06,
"loss": 0.5567,
"step": 7880
},
{
"epoch": 0.83,
"learning_rate": 3.3245382585751982e-06,
"loss": 0.6011,
"step": 7900
},
{
"epoch": 0.84,
"learning_rate": 3.2823218997361477e-06,
"loss": 0.5105,
"step": 7920
},
{
"epoch": 0.84,
"learning_rate": 3.2401055408970976e-06,
"loss": 0.3994,
"step": 7940
},
{
"epoch": 0.84,
"learning_rate": 3.197889182058048e-06,
"loss": 0.4226,
"step": 7960
},
{
"epoch": 0.84,
"learning_rate": 3.1556728232189977e-06,
"loss": 0.3077,
"step": 7980
},
{
"epoch": 0.84,
"learning_rate": 3.1134564643799476e-06,
"loss": 0.4729,
"step": 8000
},
{
"epoch": 0.85,
"learning_rate": 3.0712401055408975e-06,
"loss": 0.5839,
"step": 8020
},
{
"epoch": 0.85,
"learning_rate": 3.0290237467018474e-06,
"loss": 0.6405,
"step": 8040
},
{
"epoch": 0.85,
"learning_rate": 2.9868073878627972e-06,
"loss": 0.6151,
"step": 8060
},
{
"epoch": 0.85,
"learning_rate": 2.9445910290237467e-06,
"loss": 0.3509,
"step": 8080
},
{
"epoch": 0.85,
"learning_rate": 2.9023746701846966e-06,
"loss": 0.4864,
"step": 8100
},
{
"epoch": 0.86,
"learning_rate": 2.8601583113456464e-06,
"loss": 0.5188,
"step": 8120
},
{
"epoch": 0.86,
"learning_rate": 2.8179419525065967e-06,
"loss": 0.602,
"step": 8140
},
{
"epoch": 0.86,
"learning_rate": 2.7757255936675466e-06,
"loss": 0.4617,
"step": 8160
},
{
"epoch": 0.86,
"learning_rate": 2.7335092348284965e-06,
"loss": 0.2785,
"step": 8180
},
{
"epoch": 0.87,
"learning_rate": 2.6912928759894464e-06,
"loss": 0.3212,
"step": 8200
},
{
"epoch": 0.87,
"learning_rate": 2.6490765171503962e-06,
"loss": 0.5852,
"step": 8220
},
{
"epoch": 0.87,
"learning_rate": 2.6068601583113457e-06,
"loss": 0.5594,
"step": 8240
},
{
"epoch": 0.87,
"learning_rate": 2.5646437994722956e-06,
"loss": 0.3948,
"step": 8260
},
{
"epoch": 0.87,
"learning_rate": 2.5224274406332454e-06,
"loss": 0.5771,
"step": 8280
},
{
"epoch": 0.88,
"learning_rate": 2.4802110817941953e-06,
"loss": 0.3874,
"step": 8300
},
{
"epoch": 0.88,
"learning_rate": 2.4379947229551456e-06,
"loss": 0.6091,
"step": 8320
},
{
"epoch": 0.88,
"learning_rate": 2.395778364116095e-06,
"loss": 0.471,
"step": 8340
},
{
"epoch": 0.88,
"learning_rate": 2.353562005277045e-06,
"loss": 0.4263,
"step": 8360
},
{
"epoch": 0.88,
"learning_rate": 2.311345646437995e-06,
"loss": 0.4406,
"step": 8380
},
{
"epoch": 0.89,
"learning_rate": 2.2691292875989447e-06,
"loss": 0.3412,
"step": 8400
},
{
"epoch": 0.89,
"learning_rate": 2.2269129287598945e-06,
"loss": 0.4294,
"step": 8420
},
{
"epoch": 0.89,
"learning_rate": 2.1846965699208444e-06,
"loss": 0.6936,
"step": 8440
},
{
"epoch": 0.89,
"learning_rate": 2.1424802110817943e-06,
"loss": 0.5937,
"step": 8460
},
{
"epoch": 0.89,
"learning_rate": 2.100263852242744e-06,
"loss": 0.5567,
"step": 8480
},
{
"epoch": 0.9,
"learning_rate": 2.058047493403694e-06,
"loss": 0.4481,
"step": 8500
},
{
"epoch": 0.9,
"learning_rate": 2.015831134564644e-06,
"loss": 0.42,
"step": 8520
},
{
"epoch": 0.9,
"learning_rate": 1.973614775725594e-06,
"loss": 0.5064,
"step": 8540
},
{
"epoch": 0.9,
"learning_rate": 1.9313984168865437e-06,
"loss": 0.7567,
"step": 8560
},
{
"epoch": 0.91,
"learning_rate": 1.8891820580474935e-06,
"loss": 0.411,
"step": 8580
},
{
"epoch": 0.91,
"learning_rate": 1.8469656992084434e-06,
"loss": 0.4306,
"step": 8600
},
{
"epoch": 0.91,
"learning_rate": 1.8047493403693933e-06,
"loss": 0.6867,
"step": 8620
},
{
"epoch": 0.91,
"learning_rate": 1.7625329815303432e-06,
"loss": 0.6504,
"step": 8640
},
{
"epoch": 0.91,
"learning_rate": 1.7203166226912928e-06,
"loss": 0.4825,
"step": 8660
},
{
"epoch": 0.92,
"learning_rate": 1.678100263852243e-06,
"loss": 0.6795,
"step": 8680
},
{
"epoch": 0.92,
"learning_rate": 1.6358839050131928e-06,
"loss": 0.395,
"step": 8700
},
{
"epoch": 0.92,
"learning_rate": 1.5936675461741427e-06,
"loss": 0.4376,
"step": 8720
},
{
"epoch": 0.92,
"learning_rate": 1.5514511873350923e-06,
"loss": 0.5183,
"step": 8740
},
{
"epoch": 0.92,
"learning_rate": 1.5092348284960422e-06,
"loss": 0.5289,
"step": 8760
},
{
"epoch": 0.93,
"learning_rate": 1.4670184696569923e-06,
"loss": 0.6665,
"step": 8780
},
{
"epoch": 0.93,
"learning_rate": 1.4248021108179422e-06,
"loss": 0.5317,
"step": 8800
},
{
"epoch": 0.93,
"learning_rate": 1.3825857519788918e-06,
"loss": 0.2245,
"step": 8820
},
{
"epoch": 0.93,
"learning_rate": 1.3403693931398417e-06,
"loss": 0.44,
"step": 8840
},
{
"epoch": 0.94,
"learning_rate": 1.2981530343007918e-06,
"loss": 0.4556,
"step": 8860
},
{
"epoch": 0.94,
"learning_rate": 1.2559366754617417e-06,
"loss": 0.5804,
"step": 8880
},
{
"epoch": 0.94,
"learning_rate": 1.2137203166226915e-06,
"loss": 0.4209,
"step": 8900
},
{
"epoch": 0.94,
"learning_rate": 1.1715039577836412e-06,
"loss": 0.3838,
"step": 8920
},
{
"epoch": 0.94,
"learning_rate": 1.129287598944591e-06,
"loss": 0.4607,
"step": 8940
},
{
"epoch": 0.95,
"learning_rate": 1.087071240105541e-06,
"loss": 0.5023,
"step": 8960
},
{
"epoch": 0.95,
"learning_rate": 1.0448548812664908e-06,
"loss": 0.3096,
"step": 8980
},
{
"epoch": 0.95,
"learning_rate": 1.0026385224274407e-06,
"loss": 0.6846,
"step": 9000
},
{
"epoch": 0.95,
"learning_rate": 9.604221635883906e-07,
"loss": 0.3319,
"step": 9020
},
{
"epoch": 0.95,
"learning_rate": 9.182058047493404e-07,
"loss": 0.5508,
"step": 9040
},
{
"epoch": 0.96,
"learning_rate": 8.759894459102903e-07,
"loss": 0.6869,
"step": 9060
},
{
"epoch": 0.96,
"learning_rate": 8.337730870712402e-07,
"loss": 0.4138,
"step": 9080
},
{
"epoch": 0.96,
"learning_rate": 7.915567282321901e-07,
"loss": 0.6574,
"step": 9100
},
{
"epoch": 0.96,
"learning_rate": 7.493403693931399e-07,
"loss": 0.716,
"step": 9120
},
{
"epoch": 0.96,
"learning_rate": 7.071240105540898e-07,
"loss": 0.4334,
"step": 9140
},
{
"epoch": 0.97,
"learning_rate": 6.649076517150396e-07,
"loss": 0.481,
"step": 9160
},
{
"epoch": 0.97,
"learning_rate": 6.226912928759895e-07,
"loss": 0.6728,
"step": 9180
},
{
"epoch": 0.97,
"learning_rate": 5.804749340369393e-07,
"loss": 0.6311,
"step": 9200
},
{
"epoch": 0.97,
"learning_rate": 5.382585751978892e-07,
"loss": 0.4732,
"step": 9220
},
{
"epoch": 0.98,
"learning_rate": 4.960422163588391e-07,
"loss": 0.4456,
"step": 9240
},
{
"epoch": 0.98,
"learning_rate": 4.5382585751978896e-07,
"loss": 0.7213,
"step": 9260
},
{
"epoch": 0.98,
"learning_rate": 4.1160949868073883e-07,
"loss": 0.6193,
"step": 9280
},
{
"epoch": 0.98,
"learning_rate": 3.693931398416887e-07,
"loss": 0.4499,
"step": 9300
},
{
"epoch": 0.98,
"learning_rate": 3.271767810026386e-07,
"loss": 0.5332,
"step": 9320
},
{
"epoch": 0.99,
"learning_rate": 2.849604221635884e-07,
"loss": 0.5398,
"step": 9340
},
{
"epoch": 0.99,
"learning_rate": 2.427440633245383e-07,
"loss": 0.5923,
"step": 9360
},
{
"epoch": 0.99,
"learning_rate": 2.0052770448548815e-07,
"loss": 0.4648,
"step": 9380
},
{
"epoch": 0.99,
"learning_rate": 1.5831134564643802e-07,
"loss": 0.396,
"step": 9400
},
{
"epoch": 0.99,
"learning_rate": 1.1609498680738787e-07,
"loss": 0.4959,
"step": 9420
},
{
"epoch": 1.0,
"learning_rate": 7.387862796833773e-08,
"loss": 0.5742,
"step": 9440
},
{
"epoch": 1.0,
"learning_rate": 3.16622691292876e-08,
"loss": 0.7434,
"step": 9460
},
{
"epoch": 1.0,
"step": 9475,
"total_flos": 18716049801216.0,
"train_loss": 0.5445125763019975,
"train_runtime": 21957.5944,
"train_samples_per_second": 0.432,
"train_steps_per_second": 0.432
}
],
"max_steps": 9475,
"num_train_epochs": 1,
"total_flos": 18716049801216.0,
"trial_name": null,
"trial_params": null
}