roberta-temporal-predictor / trainer_state.json
zjiayao's picture
Upload trainer_state.json
a9dcc5a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9911172880536587,
"global_step": 148500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.983214830033369e-05,
"loss": 2.0208,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.9664296600667384e-05,
"loss": 1.9132,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.949644490100107e-05,
"loss": 1.9868,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 4.932859320133476e-05,
"loss": 1.9393,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 4.9160741501668454e-05,
"loss": 1.9249,
"step": 2500
},
{
"epoch": 0.06,
"learning_rate": 4.899288980200214e-05,
"loss": 1.9124,
"step": 3000
},
{
"epoch": 0.07,
"learning_rate": 4.882503810233582e-05,
"loss": 1.9182,
"step": 3500
},
{
"epoch": 0.08,
"learning_rate": 4.8657186402669516e-05,
"loss": 1.8824,
"step": 4000
},
{
"epoch": 0.09,
"learning_rate": 4.8489334703003204e-05,
"loss": 1.8849,
"step": 4500
},
{
"epoch": 0.1,
"learning_rate": 4.832148300333689e-05,
"loss": 1.912,
"step": 5000
},
{
"epoch": 0.11,
"learning_rate": 4.815363130367058e-05,
"loss": 1.9117,
"step": 5500
},
{
"epoch": 0.12,
"learning_rate": 4.798577960400427e-05,
"loss": 1.9266,
"step": 6000
},
{
"epoch": 0.13,
"learning_rate": 4.781792790433796e-05,
"loss": 1.8955,
"step": 6500
},
{
"epoch": 0.14,
"learning_rate": 4.765007620467165e-05,
"loss": 1.8364,
"step": 7000
},
{
"epoch": 0.15,
"learning_rate": 4.748222450500534e-05,
"loss": 1.9024,
"step": 7500
},
{
"epoch": 0.16,
"learning_rate": 4.731437280533903e-05,
"loss": 1.9295,
"step": 8000
},
{
"epoch": 0.17,
"learning_rate": 4.714652110567272e-05,
"loss": 1.8972,
"step": 8500
},
{
"epoch": 0.18,
"learning_rate": 4.697866940600641e-05,
"loss": 1.8787,
"step": 9000
},
{
"epoch": 0.19,
"learning_rate": 4.68108177063401e-05,
"loss": 1.8565,
"step": 9500
},
{
"epoch": 0.2,
"learning_rate": 4.664296600667379e-05,
"loss": 1.8623,
"step": 10000
},
{
"epoch": 0.21,
"learning_rate": 4.6475114307007474e-05,
"loss": 1.9118,
"step": 10500
},
{
"epoch": 0.22,
"learning_rate": 4.630726260734116e-05,
"loss": 1.8846,
"step": 11000
},
{
"epoch": 0.23,
"learning_rate": 4.613941090767485e-05,
"loss": 1.806,
"step": 11500
},
{
"epoch": 0.24,
"learning_rate": 4.5971559208008544e-05,
"loss": 1.8755,
"step": 12000
},
{
"epoch": 0.25,
"learning_rate": 4.580370750834223e-05,
"loss": 1.8432,
"step": 12500
},
{
"epoch": 0.26,
"learning_rate": 4.563585580867592e-05,
"loss": 1.8495,
"step": 13000
},
{
"epoch": 0.27,
"learning_rate": 4.5468004109009606e-05,
"loss": 1.8765,
"step": 13500
},
{
"epoch": 0.28,
"learning_rate": 4.53001524093433e-05,
"loss": 1.881,
"step": 14000
},
{
"epoch": 0.29,
"learning_rate": 4.513230070967699e-05,
"loss": 1.8373,
"step": 14500
},
{
"epoch": 0.3,
"learning_rate": 4.4964449010010676e-05,
"loss": 1.8529,
"step": 15000
},
{
"epoch": 0.31,
"learning_rate": 4.479659731034437e-05,
"loss": 1.8757,
"step": 15500
},
{
"epoch": 0.32,
"learning_rate": 4.462874561067806e-05,
"loss": 1.8826,
"step": 16000
},
{
"epoch": 0.33,
"learning_rate": 4.4460893911011745e-05,
"loss": 1.824,
"step": 16500
},
{
"epoch": 0.34,
"learning_rate": 4.429304221134544e-05,
"loss": 1.8273,
"step": 17000
},
{
"epoch": 0.35,
"learning_rate": 4.412519051167912e-05,
"loss": 1.8063,
"step": 17500
},
{
"epoch": 0.36,
"learning_rate": 4.395733881201281e-05,
"loss": 1.8627,
"step": 18000
},
{
"epoch": 0.37,
"learning_rate": 4.37894871123465e-05,
"loss": 1.8668,
"step": 18500
},
{
"epoch": 0.38,
"learning_rate": 4.362163541268019e-05,
"loss": 1.8519,
"step": 19000
},
{
"epoch": 0.39,
"learning_rate": 4.345378371301388e-05,
"loss": 1.8254,
"step": 19500
},
{
"epoch": 0.4,
"learning_rate": 4.328593201334757e-05,
"loss": 1.8253,
"step": 20000
},
{
"epoch": 0.41,
"learning_rate": 4.311808031368126e-05,
"loss": 1.8707,
"step": 20500
},
{
"epoch": 0.42,
"learning_rate": 4.2950228614014947e-05,
"loss": 1.865,
"step": 21000
},
{
"epoch": 0.43,
"learning_rate": 4.2782376914348634e-05,
"loss": 1.8111,
"step": 21500
},
{
"epoch": 0.44,
"learning_rate": 4.261452521468233e-05,
"loss": 1.8106,
"step": 22000
},
{
"epoch": 0.45,
"learning_rate": 4.2446673515016016e-05,
"loss": 1.8437,
"step": 22500
},
{
"epoch": 0.46,
"learning_rate": 4.2278821815349704e-05,
"loss": 1.7636,
"step": 23000
},
{
"epoch": 0.47,
"learning_rate": 4.21109701156834e-05,
"loss": 1.8333,
"step": 23500
},
{
"epoch": 0.48,
"learning_rate": 4.1943118416017085e-05,
"loss": 1.7956,
"step": 24000
},
{
"epoch": 0.49,
"learning_rate": 4.1775266716350766e-05,
"loss": 1.7986,
"step": 24500
},
{
"epoch": 0.5,
"learning_rate": 4.160741501668446e-05,
"loss": 1.8407,
"step": 25000
},
{
"epoch": 0.51,
"learning_rate": 4.143956331701815e-05,
"loss": 1.7847,
"step": 25500
},
{
"epoch": 0.52,
"learning_rate": 4.1271711617351835e-05,
"loss": 1.8141,
"step": 26000
},
{
"epoch": 0.53,
"learning_rate": 4.110385991768553e-05,
"loss": 1.8165,
"step": 26500
},
{
"epoch": 0.54,
"learning_rate": 4.093600821801922e-05,
"loss": 1.7665,
"step": 27000
},
{
"epoch": 0.55,
"learning_rate": 4.0768156518352905e-05,
"loss": 1.7716,
"step": 27500
},
{
"epoch": 0.56,
"learning_rate": 4.06003048186866e-05,
"loss": 1.8061,
"step": 28000
},
{
"epoch": 0.57,
"learning_rate": 4.043245311902029e-05,
"loss": 1.7838,
"step": 28500
},
{
"epoch": 0.58,
"learning_rate": 4.0264601419353974e-05,
"loss": 1.8049,
"step": 29000
},
{
"epoch": 0.59,
"learning_rate": 4.009674971968766e-05,
"loss": 1.8444,
"step": 29500
},
{
"epoch": 0.6,
"learning_rate": 3.9928898020021356e-05,
"loss": 1.7829,
"step": 30000
},
{
"epoch": 0.61,
"learning_rate": 3.9761046320355044e-05,
"loss": 1.7886,
"step": 30500
},
{
"epoch": 0.62,
"learning_rate": 3.959319462068873e-05,
"loss": 1.7421,
"step": 31000
},
{
"epoch": 0.63,
"learning_rate": 3.942534292102242e-05,
"loss": 1.7796,
"step": 31500
},
{
"epoch": 0.64,
"learning_rate": 3.9257491221356106e-05,
"loss": 1.7857,
"step": 32000
},
{
"epoch": 0.65,
"learning_rate": 3.9089639521689794e-05,
"loss": 1.7642,
"step": 32500
},
{
"epoch": 0.66,
"learning_rate": 3.892178782202349e-05,
"loss": 1.7624,
"step": 33000
},
{
"epoch": 0.67,
"learning_rate": 3.8753936122357176e-05,
"loss": 1.7658,
"step": 33500
},
{
"epoch": 0.68,
"learning_rate": 3.858608442269086e-05,
"loss": 1.7744,
"step": 34000
},
{
"epoch": 0.69,
"learning_rate": 3.841823272302456e-05,
"loss": 1.7797,
"step": 34500
},
{
"epoch": 0.7,
"learning_rate": 3.8250381023358245e-05,
"loss": 1.7507,
"step": 35000
},
{
"epoch": 0.72,
"learning_rate": 3.808252932369193e-05,
"loss": 1.7936,
"step": 35500
},
{
"epoch": 0.73,
"learning_rate": 3.791467762402563e-05,
"loss": 1.8028,
"step": 36000
},
{
"epoch": 0.74,
"learning_rate": 3.7746825924359314e-05,
"loss": 1.7356,
"step": 36500
},
{
"epoch": 0.75,
"learning_rate": 3.7578974224693e-05,
"loss": 1.7545,
"step": 37000
},
{
"epoch": 0.76,
"learning_rate": 3.741112252502669e-05,
"loss": 1.7676,
"step": 37500
},
{
"epoch": 0.77,
"learning_rate": 3.724327082536038e-05,
"loss": 1.7377,
"step": 38000
},
{
"epoch": 0.78,
"learning_rate": 3.7075419125694065e-05,
"loss": 1.796,
"step": 38500
},
{
"epoch": 0.79,
"learning_rate": 3.690756742602776e-05,
"loss": 1.7323,
"step": 39000
},
{
"epoch": 0.8,
"learning_rate": 3.6739715726361446e-05,
"loss": 1.7567,
"step": 39500
},
{
"epoch": 0.81,
"learning_rate": 3.6571864026695134e-05,
"loss": 1.7604,
"step": 40000
},
{
"epoch": 0.82,
"learning_rate": 3.640401232702882e-05,
"loss": 1.769,
"step": 40500
},
{
"epoch": 0.83,
"learning_rate": 3.6236160627362516e-05,
"loss": 1.7287,
"step": 41000
},
{
"epoch": 0.84,
"learning_rate": 3.60683089276962e-05,
"loss": 1.6988,
"step": 41500
},
{
"epoch": 0.85,
"learning_rate": 3.590045722802989e-05,
"loss": 1.6647,
"step": 42000
},
{
"epoch": 0.86,
"learning_rate": 3.5732605528363585e-05,
"loss": 1.7291,
"step": 42500
},
{
"epoch": 0.87,
"learning_rate": 3.556475382869727e-05,
"loss": 1.7041,
"step": 43000
},
{
"epoch": 0.88,
"learning_rate": 3.539690212903096e-05,
"loss": 1.7678,
"step": 43500
},
{
"epoch": 0.89,
"learning_rate": 3.5229050429364654e-05,
"loss": 1.7404,
"step": 44000
},
{
"epoch": 0.9,
"learning_rate": 3.506119872969834e-05,
"loss": 1.7057,
"step": 44500
},
{
"epoch": 0.91,
"learning_rate": 3.489334703003202e-05,
"loss": 1.7606,
"step": 45000
},
{
"epoch": 0.92,
"learning_rate": 3.472549533036572e-05,
"loss": 1.7058,
"step": 45500
},
{
"epoch": 0.93,
"learning_rate": 3.4557643630699405e-05,
"loss": 1.7202,
"step": 46000
},
{
"epoch": 0.94,
"learning_rate": 3.438979193103309e-05,
"loss": 1.7365,
"step": 46500
},
{
"epoch": 0.95,
"learning_rate": 3.4221940231366786e-05,
"loss": 1.7607,
"step": 47000
},
{
"epoch": 0.96,
"learning_rate": 3.4054088531700474e-05,
"loss": 1.6973,
"step": 47500
},
{
"epoch": 0.97,
"learning_rate": 3.388623683203416e-05,
"loss": 1.7425,
"step": 48000
},
{
"epoch": 0.98,
"learning_rate": 3.371838513236785e-05,
"loss": 1.7351,
"step": 48500
},
{
"epoch": 0.99,
"learning_rate": 3.355053343270154e-05,
"loss": 1.7134,
"step": 49000
},
{
"epoch": 1.0,
"learning_rate": 3.338268173303523e-05,
"loss": 1.7319,
"step": 49500
},
{
"epoch": 1.01,
"learning_rate": 3.321483003336892e-05,
"loss": 1.674,
"step": 50000
},
{
"epoch": 1.02,
"learning_rate": 3.304697833370261e-05,
"loss": 1.6833,
"step": 50500
},
{
"epoch": 1.03,
"learning_rate": 3.28791266340363e-05,
"loss": 1.7056,
"step": 51000
},
{
"epoch": 1.04,
"learning_rate": 3.271127493436998e-05,
"loss": 1.6541,
"step": 51500
},
{
"epoch": 1.05,
"learning_rate": 3.2543423234703675e-05,
"loss": 1.6648,
"step": 52000
},
{
"epoch": 1.06,
"learning_rate": 3.237557153503736e-05,
"loss": 1.6497,
"step": 52500
},
{
"epoch": 1.07,
"learning_rate": 3.220771983537105e-05,
"loss": 1.623,
"step": 53000
},
{
"epoch": 1.08,
"learning_rate": 3.2039868135704745e-05,
"loss": 1.6813,
"step": 53500
},
{
"epoch": 1.09,
"learning_rate": 3.187201643603843e-05,
"loss": 1.6686,
"step": 54000
},
{
"epoch": 1.1,
"learning_rate": 3.170416473637212e-05,
"loss": 1.6269,
"step": 54500
},
{
"epoch": 1.11,
"learning_rate": 3.1536313036705814e-05,
"loss": 1.656,
"step": 55000
},
{
"epoch": 1.12,
"learning_rate": 3.13684613370395e-05,
"loss": 1.7025,
"step": 55500
},
{
"epoch": 1.13,
"learning_rate": 3.120060963737319e-05,
"loss": 1.6969,
"step": 56000
},
{
"epoch": 1.14,
"learning_rate": 3.103275793770688e-05,
"loss": 1.6252,
"step": 56500
},
{
"epoch": 1.15,
"learning_rate": 3.086490623804057e-05,
"loss": 1.6621,
"step": 57000
},
{
"epoch": 1.16,
"learning_rate": 3.069705453837426e-05,
"loss": 1.6941,
"step": 57500
},
{
"epoch": 1.17,
"learning_rate": 3.0529202838707946e-05,
"loss": 1.6198,
"step": 58000
},
{
"epoch": 1.18,
"learning_rate": 3.0361351139041634e-05,
"loss": 1.6934,
"step": 58500
},
{
"epoch": 1.19,
"learning_rate": 3.019349943937532e-05,
"loss": 1.6778,
"step": 59000
},
{
"epoch": 1.2,
"learning_rate": 3.0025647739709012e-05,
"loss": 1.5945,
"step": 59500
},
{
"epoch": 1.21,
"learning_rate": 2.9857796040042703e-05,
"loss": 1.6752,
"step": 60000
},
{
"epoch": 1.22,
"learning_rate": 2.968994434037639e-05,
"loss": 1.6683,
"step": 60500
},
{
"epoch": 1.23,
"learning_rate": 2.952209264071008e-05,
"loss": 1.6261,
"step": 61000
},
{
"epoch": 1.24,
"learning_rate": 2.935424094104377e-05,
"loss": 1.5955,
"step": 61500
},
{
"epoch": 1.25,
"learning_rate": 2.918638924137746e-05,
"loss": 1.6483,
"step": 62000
},
{
"epoch": 1.26,
"learning_rate": 2.901853754171115e-05,
"loss": 1.5902,
"step": 62500
},
{
"epoch": 1.27,
"learning_rate": 2.885068584204484e-05,
"loss": 1.6174,
"step": 63000
},
{
"epoch": 1.28,
"learning_rate": 2.868283414237853e-05,
"loss": 1.6423,
"step": 63500
},
{
"epoch": 1.29,
"learning_rate": 2.8514982442712217e-05,
"loss": 1.6085,
"step": 64000
},
{
"epoch": 1.3,
"learning_rate": 2.8347130743045908e-05,
"loss": 1.6304,
"step": 64500
},
{
"epoch": 1.31,
"learning_rate": 2.81792790433796e-05,
"loss": 1.6473,
"step": 65000
},
{
"epoch": 1.32,
"learning_rate": 2.8011427343713283e-05,
"loss": 1.6614,
"step": 65500
},
{
"epoch": 1.33,
"learning_rate": 2.784357564404697e-05,
"loss": 1.678,
"step": 66000
},
{
"epoch": 1.34,
"learning_rate": 2.767572394438066e-05,
"loss": 1.5877,
"step": 66500
},
{
"epoch": 1.35,
"learning_rate": 2.750787224471435e-05,
"loss": 1.6678,
"step": 67000
},
{
"epoch": 1.36,
"learning_rate": 2.734002054504804e-05,
"loss": 1.5927,
"step": 67500
},
{
"epoch": 1.37,
"learning_rate": 2.717216884538173e-05,
"loss": 1.637,
"step": 68000
},
{
"epoch": 1.38,
"learning_rate": 2.7004317145715418e-05,
"loss": 1.6597,
"step": 68500
},
{
"epoch": 1.39,
"learning_rate": 2.683646544604911e-05,
"loss": 1.6144,
"step": 69000
},
{
"epoch": 1.4,
"learning_rate": 2.6668613746382797e-05,
"loss": 1.6186,
"step": 69500
},
{
"epoch": 1.41,
"learning_rate": 2.6500762046716488e-05,
"loss": 1.6057,
"step": 70000
},
{
"epoch": 1.42,
"learning_rate": 2.633291034705018e-05,
"loss": 1.6342,
"step": 70500
},
{
"epoch": 1.43,
"learning_rate": 2.6165058647383866e-05,
"loss": 1.6287,
"step": 71000
},
{
"epoch": 1.44,
"learning_rate": 2.5997206947717557e-05,
"loss": 1.5951,
"step": 71500
},
{
"epoch": 1.45,
"learning_rate": 2.5829355248051244e-05,
"loss": 1.5532,
"step": 72000
},
{
"epoch": 1.46,
"learning_rate": 2.566150354838493e-05,
"loss": 1.6008,
"step": 72500
},
{
"epoch": 1.47,
"learning_rate": 2.549365184871862e-05,
"loss": 1.6344,
"step": 73000
},
{
"epoch": 1.48,
"learning_rate": 2.532580014905231e-05,
"loss": 1.6064,
"step": 73500
},
{
"epoch": 1.49,
"learning_rate": 2.5157948449385998e-05,
"loss": 1.5704,
"step": 74000
},
{
"epoch": 1.5,
"learning_rate": 2.499009674971969e-05,
"loss": 1.651,
"step": 74500
},
{
"epoch": 1.51,
"learning_rate": 2.4822245050053376e-05,
"loss": 1.5544,
"step": 75000
},
{
"epoch": 1.52,
"learning_rate": 2.4654393350387067e-05,
"loss": 1.6534,
"step": 75500
},
{
"epoch": 1.53,
"learning_rate": 2.448654165072076e-05,
"loss": 1.5917,
"step": 76000
},
{
"epoch": 1.54,
"learning_rate": 2.4318689951054446e-05,
"loss": 1.641,
"step": 76500
},
{
"epoch": 1.55,
"learning_rate": 2.4150838251388137e-05,
"loss": 1.5987,
"step": 77000
},
{
"epoch": 1.56,
"learning_rate": 2.3982986551721824e-05,
"loss": 1.5428,
"step": 77500
},
{
"epoch": 1.57,
"learning_rate": 2.3815134852055512e-05,
"loss": 1.5564,
"step": 78000
},
{
"epoch": 1.58,
"learning_rate": 2.3647283152389203e-05,
"loss": 1.5471,
"step": 78500
},
{
"epoch": 1.59,
"learning_rate": 2.347943145272289e-05,
"loss": 1.5965,
"step": 79000
},
{
"epoch": 1.6,
"learning_rate": 2.331157975305658e-05,
"loss": 1.5883,
"step": 79500
},
{
"epoch": 1.61,
"learning_rate": 2.3143728053390272e-05,
"loss": 1.6109,
"step": 80000
},
{
"epoch": 1.62,
"learning_rate": 2.297587635372396e-05,
"loss": 1.5431,
"step": 80500
},
{
"epoch": 1.63,
"learning_rate": 2.2808024654057647e-05,
"loss": 1.565,
"step": 81000
},
{
"epoch": 1.64,
"learning_rate": 2.2640172954391338e-05,
"loss": 1.5316,
"step": 81500
},
{
"epoch": 1.65,
"learning_rate": 2.2472321254725026e-05,
"loss": 1.5565,
"step": 82000
},
{
"epoch": 1.66,
"learning_rate": 2.2304469555058717e-05,
"loss": 1.5611,
"step": 82500
},
{
"epoch": 1.67,
"learning_rate": 2.2136617855392404e-05,
"loss": 1.5737,
"step": 83000
},
{
"epoch": 1.68,
"learning_rate": 2.1968766155726095e-05,
"loss": 1.5443,
"step": 83500
},
{
"epoch": 1.69,
"learning_rate": 2.1800914456059786e-05,
"loss": 1.5886,
"step": 84000
},
{
"epoch": 1.7,
"learning_rate": 2.163306275639347e-05,
"loss": 1.5162,
"step": 84500
},
{
"epoch": 1.71,
"learning_rate": 2.146521105672716e-05,
"loss": 1.5889,
"step": 85000
},
{
"epoch": 1.72,
"learning_rate": 2.1297359357060852e-05,
"loss": 1.5115,
"step": 85500
},
{
"epoch": 1.73,
"learning_rate": 2.112950765739454e-05,
"loss": 1.554,
"step": 86000
},
{
"epoch": 1.74,
"learning_rate": 2.096165595772823e-05,
"loss": 1.5168,
"step": 86500
},
{
"epoch": 1.75,
"learning_rate": 2.0793804258061918e-05,
"loss": 1.5798,
"step": 87000
},
{
"epoch": 1.76,
"learning_rate": 2.0625952558395605e-05,
"loss": 1.5888,
"step": 87500
},
{
"epoch": 1.77,
"learning_rate": 2.0458100858729296e-05,
"loss": 1.5502,
"step": 88000
},
{
"epoch": 1.78,
"learning_rate": 2.0290249159062984e-05,
"loss": 1.5765,
"step": 88500
},
{
"epoch": 1.79,
"learning_rate": 2.0122397459396675e-05,
"loss": 1.6176,
"step": 89000
},
{
"epoch": 1.8,
"learning_rate": 1.9954545759730366e-05,
"loss": 1.584,
"step": 89500
},
{
"epoch": 1.81,
"learning_rate": 1.9786694060064053e-05,
"loss": 1.5765,
"step": 90000
},
{
"epoch": 1.82,
"learning_rate": 1.9618842360397744e-05,
"loss": 1.571,
"step": 90500
},
{
"epoch": 1.83,
"learning_rate": 1.9450990660731432e-05,
"loss": 1.5622,
"step": 91000
},
{
"epoch": 1.84,
"learning_rate": 1.928313896106512e-05,
"loss": 1.5403,
"step": 91500
},
{
"epoch": 1.85,
"learning_rate": 1.911528726139881e-05,
"loss": 1.5757,
"step": 92000
},
{
"epoch": 1.86,
"learning_rate": 1.8947435561732498e-05,
"loss": 1.5464,
"step": 92500
},
{
"epoch": 1.87,
"learning_rate": 1.877958386206619e-05,
"loss": 1.5551,
"step": 93000
},
{
"epoch": 1.88,
"learning_rate": 1.861173216239988e-05,
"loss": 1.6047,
"step": 93500
},
{
"epoch": 1.89,
"learning_rate": 1.8443880462733567e-05,
"loss": 1.5463,
"step": 94000
},
{
"epoch": 1.9,
"learning_rate": 1.8276028763067255e-05,
"loss": 1.5441,
"step": 94500
},
{
"epoch": 1.91,
"learning_rate": 1.8108177063400946e-05,
"loss": 1.4773,
"step": 95000
},
{
"epoch": 1.92,
"learning_rate": 1.7940325363734633e-05,
"loss": 1.5374,
"step": 95500
},
{
"epoch": 1.93,
"learning_rate": 1.7772473664068324e-05,
"loss": 1.5019,
"step": 96000
},
{
"epoch": 1.94,
"learning_rate": 1.760462196440201e-05,
"loss": 1.5133,
"step": 96500
},
{
"epoch": 1.95,
"learning_rate": 1.7436770264735703e-05,
"loss": 1.5296,
"step": 97000
},
{
"epoch": 1.96,
"learning_rate": 1.7268918565069393e-05,
"loss": 1.5244,
"step": 97500
},
{
"epoch": 1.97,
"learning_rate": 1.7101066865403078e-05,
"loss": 1.5128,
"step": 98000
},
{
"epoch": 1.98,
"learning_rate": 1.693321516573677e-05,
"loss": 1.5492,
"step": 98500
},
{
"epoch": 1.99,
"learning_rate": 1.676536346607046e-05,
"loss": 1.5296,
"step": 99000
},
{
"epoch": 2.0,
"learning_rate": 1.6597511766404147e-05,
"loss": 1.5301,
"step": 99500
},
{
"epoch": 2.01,
"learning_rate": 1.6429660066737838e-05,
"loss": 1.437,
"step": 100000
},
{
"epoch": 2.02,
"learning_rate": 1.6261808367071525e-05,
"loss": 1.5212,
"step": 100500
},
{
"epoch": 2.03,
"learning_rate": 1.6093956667405216e-05,
"loss": 1.5229,
"step": 101000
},
{
"epoch": 2.04,
"learning_rate": 1.5926104967738904e-05,
"loss": 1.5179,
"step": 101500
},
{
"epoch": 2.05,
"learning_rate": 1.575825326807259e-05,
"loss": 1.4308,
"step": 102000
},
{
"epoch": 2.06,
"learning_rate": 1.5590401568406282e-05,
"loss": 1.4449,
"step": 102500
},
{
"epoch": 2.07,
"learning_rate": 1.5422549868739973e-05,
"loss": 1.4243,
"step": 103000
},
{
"epoch": 2.08,
"learning_rate": 1.525469816907366e-05,
"loss": 1.4568,
"step": 103500
},
{
"epoch": 2.09,
"learning_rate": 1.508684646940735e-05,
"loss": 1.4311,
"step": 104000
},
{
"epoch": 2.1,
"learning_rate": 1.4918994769741041e-05,
"loss": 1.4512,
"step": 104500
},
{
"epoch": 2.11,
"learning_rate": 1.4751143070074727e-05,
"loss": 1.4654,
"step": 105000
},
{
"epoch": 2.13,
"learning_rate": 1.4583291370408416e-05,
"loss": 1.5063,
"step": 105500
},
{
"epoch": 2.14,
"learning_rate": 1.4415439670742107e-05,
"loss": 1.4677,
"step": 106000
},
{
"epoch": 2.15,
"learning_rate": 1.4247587971075796e-05,
"loss": 1.42,
"step": 106500
},
{
"epoch": 2.16,
"learning_rate": 1.4079736271409485e-05,
"loss": 1.4649,
"step": 107000
},
{
"epoch": 2.17,
"learning_rate": 1.3911884571743175e-05,
"loss": 1.457,
"step": 107500
},
{
"epoch": 2.18,
"learning_rate": 1.3744032872076864e-05,
"loss": 1.4483,
"step": 108000
},
{
"epoch": 2.19,
"learning_rate": 1.3576181172410551e-05,
"loss": 1.4556,
"step": 108500
},
{
"epoch": 2.2,
"learning_rate": 1.340832947274424e-05,
"loss": 1.4821,
"step": 109000
},
{
"epoch": 2.21,
"learning_rate": 1.324047777307793e-05,
"loss": 1.4516,
"step": 109500
},
{
"epoch": 2.22,
"learning_rate": 1.307262607341162e-05,
"loss": 1.4931,
"step": 110000
},
{
"epoch": 2.23,
"learning_rate": 1.290477437374531e-05,
"loss": 1.486,
"step": 110500
},
{
"epoch": 2.24,
"learning_rate": 1.2736922674079e-05,
"loss": 1.4215,
"step": 111000
},
{
"epoch": 2.25,
"learning_rate": 1.2569070974412688e-05,
"loss": 1.4592,
"step": 111500
},
{
"epoch": 2.26,
"learning_rate": 1.2401219274746378e-05,
"loss": 1.4665,
"step": 112000
},
{
"epoch": 2.27,
"learning_rate": 1.2233367575080065e-05,
"loss": 1.4869,
"step": 112500
},
{
"epoch": 2.28,
"learning_rate": 1.2065515875413754e-05,
"loss": 1.4399,
"step": 113000
},
{
"epoch": 2.29,
"learning_rate": 1.1897664175747444e-05,
"loss": 1.4149,
"step": 113500
},
{
"epoch": 2.3,
"learning_rate": 1.1729812476081135e-05,
"loss": 1.4397,
"step": 114000
},
{
"epoch": 2.31,
"learning_rate": 1.1561960776414822e-05,
"loss": 1.4257,
"step": 114500
},
{
"epoch": 2.32,
"learning_rate": 1.1394109076748511e-05,
"loss": 1.4216,
"step": 115000
},
{
"epoch": 2.33,
"learning_rate": 1.12262573770822e-05,
"loss": 1.4762,
"step": 115500
},
{
"epoch": 2.34,
"learning_rate": 1.105840567741589e-05,
"loss": 1.4572,
"step": 116000
},
{
"epoch": 2.35,
"learning_rate": 1.0890553977749579e-05,
"loss": 1.4371,
"step": 116500
},
{
"epoch": 2.36,
"learning_rate": 1.0722702278083268e-05,
"loss": 1.373,
"step": 117000
},
{
"epoch": 2.37,
"learning_rate": 1.0554850578416957e-05,
"loss": 1.4095,
"step": 117500
},
{
"epoch": 2.38,
"learning_rate": 1.0386998878750647e-05,
"loss": 1.428,
"step": 118000
},
{
"epoch": 2.39,
"learning_rate": 1.0219147179084336e-05,
"loss": 1.3898,
"step": 118500
},
{
"epoch": 2.4,
"learning_rate": 1.0051295479418025e-05,
"loss": 1.3816,
"step": 119000
},
{
"epoch": 2.41,
"learning_rate": 9.883443779751714e-06,
"loss": 1.4071,
"step": 119500
},
{
"epoch": 2.42,
"learning_rate": 9.715592080085404e-06,
"loss": 1.4318,
"step": 120000
},
{
"epoch": 2.43,
"learning_rate": 9.547740380419093e-06,
"loss": 1.4176,
"step": 120500
},
{
"epoch": 2.44,
"learning_rate": 9.37988868075278e-06,
"loss": 1.4008,
"step": 121000
},
{
"epoch": 2.45,
"learning_rate": 9.212036981086471e-06,
"loss": 1.3841,
"step": 121500
},
{
"epoch": 2.46,
"learning_rate": 9.04418528142016e-06,
"loss": 1.3971,
"step": 122000
},
{
"epoch": 2.47,
"learning_rate": 8.87633358175385e-06,
"loss": 1.4213,
"step": 122500
},
{
"epoch": 2.48,
"learning_rate": 8.708481882087537e-06,
"loss": 1.4593,
"step": 123000
},
{
"epoch": 2.49,
"learning_rate": 8.540630182421228e-06,
"loss": 1.4426,
"step": 123500
},
{
"epoch": 2.5,
"learning_rate": 8.372778482754917e-06,
"loss": 1.4344,
"step": 124000
},
{
"epoch": 2.51,
"learning_rate": 8.204926783088605e-06,
"loss": 1.4315,
"step": 124500
},
{
"epoch": 2.52,
"learning_rate": 8.037075083422294e-06,
"loss": 1.3943,
"step": 125000
},
{
"epoch": 2.53,
"learning_rate": 7.869223383755985e-06,
"loss": 1.4208,
"step": 125500
},
{
"epoch": 2.54,
"learning_rate": 7.701371684089674e-06,
"loss": 1.3591,
"step": 126000
},
{
"epoch": 2.55,
"learning_rate": 7.533519984423362e-06,
"loss": 1.3961,
"step": 126500
},
{
"epoch": 2.56,
"learning_rate": 7.365668284757052e-06,
"loss": 1.4032,
"step": 127000
},
{
"epoch": 2.57,
"learning_rate": 7.197816585090741e-06,
"loss": 1.4005,
"step": 127500
},
{
"epoch": 2.58,
"learning_rate": 7.02996488542443e-06,
"loss": 1.3813,
"step": 128000
},
{
"epoch": 2.59,
"learning_rate": 6.862113185758119e-06,
"loss": 1.3699,
"step": 128500
},
{
"epoch": 2.6,
"learning_rate": 6.694261486091809e-06,
"loss": 1.3379,
"step": 129000
},
{
"epoch": 2.61,
"learning_rate": 6.526409786425498e-06,
"loss": 1.4578,
"step": 129500
},
{
"epoch": 2.62,
"learning_rate": 6.3585580867591865e-06,
"loss": 1.3731,
"step": 130000
},
{
"epoch": 2.63,
"learning_rate": 6.190706387092876e-06,
"loss": 1.3846,
"step": 130500
},
{
"epoch": 2.64,
"learning_rate": 6.022854687426565e-06,
"loss": 1.3794,
"step": 131000
},
{
"epoch": 2.65,
"learning_rate": 5.855002987760254e-06,
"loss": 1.3652,
"step": 131500
},
{
"epoch": 2.66,
"learning_rate": 5.6871512880939434e-06,
"loss": 1.3766,
"step": 132000
},
{
"epoch": 2.67,
"learning_rate": 5.519299588427633e-06,
"loss": 1.3596,
"step": 132500
},
{
"epoch": 2.68,
"learning_rate": 5.351447888761322e-06,
"loss": 1.3424,
"step": 133000
},
{
"epoch": 2.69,
"learning_rate": 5.183596189095011e-06,
"loss": 1.3692,
"step": 133500
},
{
"epoch": 2.7,
"learning_rate": 5.0157444894287e-06,
"loss": 1.3868,
"step": 134000
},
{
"epoch": 2.71,
"learning_rate": 4.8478927897623896e-06,
"loss": 1.3785,
"step": 134500
},
{
"epoch": 2.72,
"learning_rate": 4.680041090096079e-06,
"loss": 1.4073,
"step": 135000
},
{
"epoch": 2.73,
"learning_rate": 4.512189390429768e-06,
"loss": 1.4196,
"step": 135500
},
{
"epoch": 2.74,
"learning_rate": 4.344337690763456e-06,
"loss": 1.36,
"step": 136000
},
{
"epoch": 2.75,
"learning_rate": 4.1764859910971465e-06,
"loss": 1.3424,
"step": 136500
},
{
"epoch": 2.76,
"learning_rate": 4.008634291430835e-06,
"loss": 1.3667,
"step": 137000
},
{
"epoch": 2.77,
"learning_rate": 3.840782591764525e-06,
"loss": 1.3806,
"step": 137500
},
{
"epoch": 2.78,
"learning_rate": 3.6729308920982133e-06,
"loss": 1.3193,
"step": 138000
},
{
"epoch": 2.79,
"learning_rate": 3.505079192431903e-06,
"loss": 1.3346,
"step": 138500
},
{
"epoch": 2.8,
"learning_rate": 3.3372274927655918e-06,
"loss": 1.3369,
"step": 139000
},
{
"epoch": 2.81,
"learning_rate": 3.169375793099281e-06,
"loss": 1.3824,
"step": 139500
},
{
"epoch": 2.82,
"learning_rate": 3.0015240934329702e-06,
"loss": 1.3799,
"step": 140000
},
{
"epoch": 2.83,
"learning_rate": 2.8336723937666595e-06,
"loss": 1.3645,
"step": 140500
},
{
"epoch": 2.84,
"learning_rate": 2.6658206941003487e-06,
"loss": 1.343,
"step": 141000
},
{
"epoch": 2.85,
"learning_rate": 2.497968994434038e-06,
"loss": 1.3412,
"step": 141500
},
{
"epoch": 2.86,
"learning_rate": 2.330117294767727e-06,
"loss": 1.3874,
"step": 142000
},
{
"epoch": 2.87,
"learning_rate": 2.1622655951014164e-06,
"loss": 1.4018,
"step": 142500
},
{
"epoch": 2.88,
"learning_rate": 1.9944138954351056e-06,
"loss": 1.3285,
"step": 143000
},
{
"epoch": 2.89,
"learning_rate": 1.8265621957687942e-06,
"loss": 1.349,
"step": 143500
},
{
"epoch": 2.9,
"learning_rate": 1.6587104961024834e-06,
"loss": 1.3412,
"step": 144000
},
{
"epoch": 2.91,
"learning_rate": 1.4908587964361727e-06,
"loss": 1.4176,
"step": 144500
},
{
"epoch": 2.92,
"learning_rate": 1.3230070967698619e-06,
"loss": 1.3716,
"step": 145000
},
{
"epoch": 2.93,
"learning_rate": 1.1551553971035511e-06,
"loss": 1.3065,
"step": 145500
},
{
"epoch": 2.94,
"learning_rate": 9.873036974372403e-07,
"loss": 1.377,
"step": 146000
},
{
"epoch": 2.95,
"learning_rate": 8.194519977709295e-07,
"loss": 1.3855,
"step": 146500
},
{
"epoch": 2.96,
"learning_rate": 6.516002981046186e-07,
"loss": 1.3627,
"step": 147000
},
{
"epoch": 2.97,
"learning_rate": 4.837485984383078e-07,
"loss": 1.3446,
"step": 147500
},
{
"epoch": 2.98,
"learning_rate": 3.15896898771997e-07,
"loss": 1.3152,
"step": 148000
},
{
"epoch": 2.99,
"learning_rate": 1.4804519910568613e-07,
"loss": 1.3785,
"step": 148500
}
],
"max_steps": 148941,
"num_train_epochs": 3,
"total_flos": 41584533903906840,
"trial_name": null,
"trial_params": null
}