|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 212490, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9882347404583746e-05, |
|
"loss": 2.9231, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9764694809167496e-05, |
|
"loss": 2.1627, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.964704221375124e-05, |
|
"loss": 1.8699, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9529389618334984e-05, |
|
"loss": 1.7267, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.941173702291873e-05, |
|
"loss": 1.6276, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.929408442750247e-05, |
|
"loss": 1.5576, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9176431832086214e-05, |
|
"loss": 1.5107, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.905877923666996e-05, |
|
"loss": 1.4644, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.894112664125371e-05, |
|
"loss": 1.4061, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.882347404583745e-05, |
|
"loss": 1.3747, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8705821450421196e-05, |
|
"loss": 1.3601, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8588168855004946e-05, |
|
"loss": 1.3315, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.847051625958869e-05, |
|
"loss": 1.2922, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.835286366417243e-05, |
|
"loss": 1.2699, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8235211068756184e-05, |
|
"loss": 1.2293, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.811755847333993e-05, |
|
"loss": 1.227, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.799990587792367e-05, |
|
"loss": 1.2126, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7882253282507415e-05, |
|
"loss": 1.1798, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.776460068709116e-05, |
|
"loss": 1.1901, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76469480916749e-05, |
|
"loss": 1.1668, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.752929549625865e-05, |
|
"loss": 1.1431, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7411642900842396e-05, |
|
"loss": 1.1699, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.729399030542614e-05, |
|
"loss": 1.1271, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.717633771000988e-05, |
|
"loss": 1.1251, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7058685114593633e-05, |
|
"loss": 1.0963, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.694103251917738e-05, |
|
"loss": 1.1141, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.682337992376112e-05, |
|
"loss": 1.0974, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6705727328344864e-05, |
|
"loss": 1.0829, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.658807473292861e-05, |
|
"loss": 1.0893, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.647042213751235e-05, |
|
"loss": 1.0835, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.63527695420961e-05, |
|
"loss": 1.0583, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6235116946679846e-05, |
|
"loss": 1.0708, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.611746435126359e-05, |
|
"loss": 1.0695, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.599981175584734e-05, |
|
"loss": 1.0373, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.588215916043108e-05, |
|
"loss": 1.02, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.576450656501483e-05, |
|
"loss": 1.0366, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.564685396959858e-05, |
|
"loss": 1.0232, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.552920137418232e-05, |
|
"loss": 0.9946, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.541154877876606e-05, |
|
"loss": 0.9895, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.52938961833498e-05, |
|
"loss": 1.0122, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.517624358793355e-05, |
|
"loss": 1.0048, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5058590992517295e-05, |
|
"loss": 0.9837, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.494093839710104e-05, |
|
"loss": 0.9891, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.482328580168479e-05, |
|
"loss": 0.9838, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.470563320626853e-05, |
|
"loss": 0.9881, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4587980610852277e-05, |
|
"loss": 0.9387, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.447032801543603e-05, |
|
"loss": 0.9741, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.435267542001977e-05, |
|
"loss": 0.9668, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4235022824603514e-05, |
|
"loss": 0.9816, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.411737022918726e-05, |
|
"loss": 0.9479, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3999717633771e-05, |
|
"loss": 0.9568, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3882065038354745e-05, |
|
"loss": 0.9476, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3764412442938495e-05, |
|
"loss": 0.9533, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.364675984752224e-05, |
|
"loss": 0.9282, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.352910725210598e-05, |
|
"loss": 0.9454, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3411454656689726e-05, |
|
"loss": 0.9418, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.329380206127348e-05, |
|
"loss": 0.9214, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.317614946585722e-05, |
|
"loss": 0.9193, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3058496870440964e-05, |
|
"loss": 0.8905, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.294084427502471e-05, |
|
"loss": 0.9192, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.282319167960845e-05, |
|
"loss": 0.9106, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2705539084192195e-05, |
|
"loss": 0.8986, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2587886488775945e-05, |
|
"loss": 0.9049, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.247023389335969e-05, |
|
"loss": 0.8943, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.235258129794343e-05, |
|
"loss": 0.8912, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.223492870252718e-05, |
|
"loss": 0.8812, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2117276107110926e-05, |
|
"loss": 0.8942, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.199962351169467e-05, |
|
"loss": 0.8956, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1881970916278414e-05, |
|
"loss": 0.8793, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1764318320862164e-05, |
|
"loss": 0.8793, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.164666572544591e-05, |
|
"loss": 0.8847, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.152901313002965e-05, |
|
"loss": 0.886, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1411360534613395e-05, |
|
"loss": 0.8677, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.129370793919714e-05, |
|
"loss": 0.8751, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.117605534378088e-05, |
|
"loss": 0.8672, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.105840274836463e-05, |
|
"loss": 0.8774, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0940750152948376e-05, |
|
"loss": 0.8735, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.082309755753212e-05, |
|
"loss": 0.8639, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.070544496211587e-05, |
|
"loss": 0.8609, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0587792366699614e-05, |
|
"loss": 0.8669, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.047013977128336e-05, |
|
"loss": 0.8662, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.03524871758671e-05, |
|
"loss": 0.8522, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0234834580450845e-05, |
|
"loss": 0.8609, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.011718198503459e-05, |
|
"loss": 0.8459, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.999952938961833e-05, |
|
"loss": 0.8384, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.988187679420208e-05, |
|
"loss": 0.8335, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9764224198785826e-05, |
|
"loss": 0.8567, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.964657160336957e-05, |
|
"loss": 0.8567, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.952891900795332e-05, |
|
"loss": 0.8256, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9411266412537063e-05, |
|
"loss": 0.8289, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.929361381712081e-05, |
|
"loss": 0.8406, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.917596122170456e-05, |
|
"loss": 0.8277, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.90583086262883e-05, |
|
"loss": 0.807, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.8940656030872045e-05, |
|
"loss": 0.8062, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.882300343545579e-05, |
|
"loss": 0.825, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.870535084003953e-05, |
|
"loss": 0.8083, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8587698244623275e-05, |
|
"loss": 0.8222, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8470045649207026e-05, |
|
"loss": 0.8262, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.835239305379077e-05, |
|
"loss": 0.8231, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.823474045837451e-05, |
|
"loss": 0.8154, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.811708786295826e-05, |
|
"loss": 0.8021, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.799943526754201e-05, |
|
"loss": 0.8163, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.788178267212575e-05, |
|
"loss": 0.8021, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7764130076709494e-05, |
|
"loss": 0.7975, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.764647748129324e-05, |
|
"loss": 0.7899, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.752882488587698e-05, |
|
"loss": 0.81, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7411172290460725e-05, |
|
"loss": 0.8023, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7293519695044476e-05, |
|
"loss": 0.7946, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.717586709962822e-05, |
|
"loss": 0.8181, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.705821450421196e-05, |
|
"loss": 0.8046, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.694056190879571e-05, |
|
"loss": 0.809, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.682290931337946e-05, |
|
"loss": 0.8072, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.67052567179632e-05, |
|
"loss": 0.7868, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6587604122546944e-05, |
|
"loss": 0.7887, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6469951527130694e-05, |
|
"loss": 0.7829, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.635229893171443e-05, |
|
"loss": 0.7685, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6234646336298175e-05, |
|
"loss": 0.7888, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6116993740881925e-05, |
|
"loss": 0.791, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.599934114546567e-05, |
|
"loss": 0.7961, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.588168855004941e-05, |
|
"loss": 0.7733, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.576403595463316e-05, |
|
"loss": 0.7765, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5646383359216907e-05, |
|
"loss": 0.7793, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.552873076380065e-05, |
|
"loss": 0.7985, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.54110781683844e-05, |
|
"loss": 0.7671, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5293425572968144e-05, |
|
"loss": 0.779, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.517577297755189e-05, |
|
"loss": 0.7888, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.505812038213563e-05, |
|
"loss": 0.7731, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4940467786719375e-05, |
|
"loss": 0.783, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.482281519130312e-05, |
|
"loss": 0.7639, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.470516259588686e-05, |
|
"loss": 0.7659, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.458751000047061e-05, |
|
"loss": 0.7667, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4469857405054356e-05, |
|
"loss": 0.7564, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.43522048096381e-05, |
|
"loss": 0.7697, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.423455221422185e-05, |
|
"loss": 0.765, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4116899618805594e-05, |
|
"loss": 0.7538, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.399924702338934e-05, |
|
"loss": 0.7685, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.388159442797309e-05, |
|
"loss": 0.7635, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3763941832556825e-05, |
|
"loss": 0.7547, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.364628923714057e-05, |
|
"loss": 0.7571, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.352863664172432e-05, |
|
"loss": 0.7492, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.341098404630806e-05, |
|
"loss": 0.7469, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3293331450891806e-05, |
|
"loss": 0.7685, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3175678855475556e-05, |
|
"loss": 0.7542, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.30580262600593e-05, |
|
"loss": 0.7503, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.2940373664643044e-05, |
|
"loss": 0.7487, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.282272106922679e-05, |
|
"loss": 0.7496, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.270506847381054e-05, |
|
"loss": 0.7461, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.258741587839428e-05, |
|
"loss": 0.7283, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2469763282978025e-05, |
|
"loss": 0.7378, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.235211068756177e-05, |
|
"loss": 0.731, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.223445809214551e-05, |
|
"loss": 0.7312, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2116805496729256e-05, |
|
"loss": 0.7316, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.1999152901313006e-05, |
|
"loss": 0.7235, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.188150030589675e-05, |
|
"loss": 0.736, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.176384771048049e-05, |
|
"loss": 0.7389, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1646195115064244e-05, |
|
"loss": 0.7232, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.152854251964799e-05, |
|
"loss": 0.744, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.141088992423173e-05, |
|
"loss": 0.734, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1293237328815475e-05, |
|
"loss": 0.7253, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.117558473339922e-05, |
|
"loss": 0.7222, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.105793213798296e-05, |
|
"loss": 0.7347, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.0940279542566705e-05, |
|
"loss": 0.7022, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.0822626947150456e-05, |
|
"loss": 0.7324, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.07049743517342e-05, |
|
"loss": 0.7156, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.058732175631794e-05, |
|
"loss": 0.7259, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0469669160901693e-05, |
|
"loss": 0.7161, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0352016565485437e-05, |
|
"loss": 0.7265, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.023436397006918e-05, |
|
"loss": 0.729, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0116711374652928e-05, |
|
"loss": 0.7185, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.999905877923667e-05, |
|
"loss": 0.7164, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9881406183820415e-05, |
|
"loss": 0.6989, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9763753588404165e-05, |
|
"loss": 0.7204, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.964610099298791e-05, |
|
"loss": 0.7228, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.952844839757165e-05, |
|
"loss": 0.7076, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9410795802155393e-05, |
|
"loss": 0.7198, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9293143206739143e-05, |
|
"loss": 0.7035, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9175490611322887e-05, |
|
"loss": 0.707, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.905783801590663e-05, |
|
"loss": 0.7194, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8940185420490377e-05, |
|
"loss": 0.6997, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.882253282507412e-05, |
|
"loss": 0.714, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8704880229657865e-05, |
|
"loss": 0.6986, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8587227634241615e-05, |
|
"loss": 0.6982, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.846957503882536e-05, |
|
"loss": 0.6838, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8351922443409102e-05, |
|
"loss": 0.711, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.823426984799285e-05, |
|
"loss": 0.7111, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8116617252576593e-05, |
|
"loss": 0.7183, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.7998964657160337e-05, |
|
"loss": 0.7133, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7881312061744087e-05, |
|
"loss": 0.717, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.776365946632783e-05, |
|
"loss": 0.7153, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7646006870911574e-05, |
|
"loss": 0.7085, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7528354275495318e-05, |
|
"loss": 0.7139, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7410701680079065e-05, |
|
"loss": 0.6945, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.729304908466281e-05, |
|
"loss": 0.7022, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7175396489246552e-05, |
|
"loss": 0.689, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7057743893830302e-05, |
|
"loss": 0.6878, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6940091298414043e-05, |
|
"loss": 0.6872, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6822438702997786e-05, |
|
"loss": 0.6927, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6704786107581537e-05, |
|
"loss": 0.6944, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.658713351216528e-05, |
|
"loss": 0.6956, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6469480916749024e-05, |
|
"loss": 0.6888, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.635182832133277e-05, |
|
"loss": 0.6881, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6234175725916514e-05, |
|
"loss": 0.6932, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6116523130500258e-05, |
|
"loss": 0.691, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.599887053508401e-05, |
|
"loss": 0.6786, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5881217939667752e-05, |
|
"loss": 0.6939, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5763565344251496e-05, |
|
"loss": 0.6899, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.564591274883524e-05, |
|
"loss": 0.694, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5528260153418986e-05, |
|
"loss": 0.6734, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.541060755800273e-05, |
|
"loss": 0.6901, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5292954962586474e-05, |
|
"loss": 0.6726, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5175302367170224e-05, |
|
"loss": 0.6855, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5057649771753968e-05, |
|
"loss": 0.6883, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.493999717633771e-05, |
|
"loss": 0.6785, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4822344580921455e-05, |
|
"loss": 0.6753, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4704691985505202e-05, |
|
"loss": 0.6728, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.458703939008895e-05, |
|
"loss": 0.6804, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4469386794672692e-05, |
|
"loss": 0.6792, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4351734199256436e-05, |
|
"loss": 0.6722, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.423408160384018e-05, |
|
"loss": 0.6629, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4116429008423927e-05, |
|
"loss": 0.664, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3998776413007674e-05, |
|
"loss": 0.6612, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3881123817591417e-05, |
|
"loss": 0.6662, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3763471222175164e-05, |
|
"loss": 0.6791, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3645818626758908e-05, |
|
"loss": 0.6658, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.352816603134265e-05, |
|
"loss": 0.6676, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.34105134359264e-05, |
|
"loss": 0.6798, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3292860840510142e-05, |
|
"loss": 0.6786, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.317520824509389e-05, |
|
"loss": 0.661, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3057555649677633e-05, |
|
"loss": 0.6662, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2939903054261376e-05, |
|
"loss": 0.6649, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2822250458845123e-05, |
|
"loss": 0.657, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2704597863428867e-05, |
|
"loss": 0.6664, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2586945268012614e-05, |
|
"loss": 0.6765, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2469292672596358e-05, |
|
"loss": 0.664, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.23516400771801e-05, |
|
"loss": 0.6664, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2233987481763848e-05, |
|
"loss": 0.6532, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2116334886347595e-05, |
|
"loss": 0.6687, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.199868229093134e-05, |
|
"loss": 0.6621, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1881029695515086e-05, |
|
"loss": 0.6671, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.176337710009883e-05, |
|
"loss": 0.6619, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1645724504682573e-05, |
|
"loss": 0.6549, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.152807190926632e-05, |
|
"loss": 0.6668, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1410419313850064e-05, |
|
"loss": 0.6439, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.129276671843381e-05, |
|
"loss": 0.6554, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1175114123017554e-05, |
|
"loss": 0.6395, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1057461527601298e-05, |
|
"loss": 0.6482, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0939808932185045e-05, |
|
"loss": 0.6607, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.082215633676879e-05, |
|
"loss": 0.6508, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0704503741352536e-05, |
|
"loss": 0.6438, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0586851145936283e-05, |
|
"loss": 0.658, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0469198550520026e-05, |
|
"loss": 0.6409, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.035154595510377e-05, |
|
"loss": 0.6439, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0233893359687517e-05, |
|
"loss": 0.6674, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.011624076427126e-05, |
|
"loss": 0.66, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9998588168855007e-05, |
|
"loss": 0.6576, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.988093557343875e-05, |
|
"loss": 0.6445, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9763282978022495e-05, |
|
"loss": 0.6542, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.964563038260624e-05, |
|
"loss": 0.6412, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9527977787189985e-05, |
|
"loss": 0.6496, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9410325191773732e-05, |
|
"loss": 0.6423, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.929267259635748e-05, |
|
"loss": 0.6437, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.917502000094122e-05, |
|
"loss": 0.6467, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9057367405524967e-05, |
|
"loss": 0.6481, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.893971481010871e-05, |
|
"loss": 0.6442, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8822062214692457e-05, |
|
"loss": 0.6345, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8704409619276204e-05, |
|
"loss": 0.642, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8586757023859948e-05, |
|
"loss": 0.6373, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.846910442844369e-05, |
|
"loss": 0.6461, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.835145183302744e-05, |
|
"loss": 0.6429, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8233799237611182e-05, |
|
"loss": 0.6389, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.811614664219493e-05, |
|
"loss": 0.6395, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7998494046778673e-05, |
|
"loss": 0.6502, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7880841451362416e-05, |
|
"loss": 0.6417, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7763188855946163e-05, |
|
"loss": 0.6376, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7645536260529907e-05, |
|
"loss": 0.6368, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7527883665113654e-05, |
|
"loss": 0.638, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.74102310696974e-05, |
|
"loss": 0.6508, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7292578474281145e-05, |
|
"loss": 0.6311, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7174925878864888e-05, |
|
"loss": 0.641, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7057273283448632e-05, |
|
"loss": 0.638, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.693962068803238e-05, |
|
"loss": 0.6176, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6821968092616126e-05, |
|
"loss": 0.6346, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.670431549719987e-05, |
|
"loss": 0.6348, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6586662901783613e-05, |
|
"loss": 0.6454, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6469010306367357e-05, |
|
"loss": 0.6237, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6351357710951104e-05, |
|
"loss": 0.622, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.623370511553485e-05, |
|
"loss": 0.6112, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6116052520118594e-05, |
|
"loss": 0.6288, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.599839992470234e-05, |
|
"loss": 0.641, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5880747329286085e-05, |
|
"loss": 0.6298, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.576309473386983e-05, |
|
"loss": 0.6144, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5645442138453575e-05, |
|
"loss": 0.6261, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.552778954303732e-05, |
|
"loss": 0.6195, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5410136947621066e-05, |
|
"loss": 0.6249, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.529248435220481e-05, |
|
"loss": 0.6395, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5174831756788555e-05, |
|
"loss": 0.6273, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.50571791613723e-05, |
|
"loss": 0.6279, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4939526565956047e-05, |
|
"loss": 0.6157, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4821873970539791e-05, |
|
"loss": 0.6238, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4704221375123536e-05, |
|
"loss": 0.6222, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.458656877970728e-05, |
|
"loss": 0.6292, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4468916184291025e-05, |
|
"loss": 0.6159, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4351263588874772e-05, |
|
"loss": 0.6197, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4233610993458516e-05, |
|
"loss": 0.6247, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4115958398042261e-05, |
|
"loss": 0.6152, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3998305802626008e-05, |
|
"loss": 0.6279, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3880653207209752e-05, |
|
"loss": 0.6107, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3763000611793497e-05, |
|
"loss": 0.62, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.364534801637724e-05, |
|
"loss": 0.6009, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3527695420960988e-05, |
|
"loss": 0.6217, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3410042825544733e-05, |
|
"loss": 0.613, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3292390230128477e-05, |
|
"loss": 0.6257, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3174737634712222e-05, |
|
"loss": 0.6257, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3057085039295969e-05, |
|
"loss": 0.6184, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2939432443879713e-05, |
|
"loss": 0.6118, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2821779848463458e-05, |
|
"loss": 0.5928, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2704127253047201e-05, |
|
"loss": 0.6092, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2586474657630948e-05, |
|
"loss": 0.6055, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2468822062214692e-05, |
|
"loss": 0.6102, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2351169466798439e-05, |
|
"loss": 0.6194, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2233516871382183e-05, |
|
"loss": 0.6108, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2115864275965928e-05, |
|
"loss": 0.6091, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1998211680549673e-05, |
|
"loss": 0.6019, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1880559085133419e-05, |
|
"loss": 0.6176, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1762906489717164e-05, |
|
"loss": 0.6149, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.164525389430091e-05, |
|
"loss": 0.5845, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1527601298884653e-05, |
|
"loss": 0.6121, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.14099487034684e-05, |
|
"loss": 0.6092, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1292296108052145e-05, |
|
"loss": 0.606, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1174643512635889e-05, |
|
"loss": 0.6011, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1056990917219634e-05, |
|
"loss": 0.6015, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.093933832180338e-05, |
|
"loss": 0.6132, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0821685726387125e-05, |
|
"loss": 0.6025, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.070403313097087e-05, |
|
"loss": 0.6099, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0586380535554614e-05, |
|
"loss": 0.6172, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.046872794013836e-05, |
|
"loss": 0.6132, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0351075344722106e-05, |
|
"loss": 0.5979, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.023342274930585e-05, |
|
"loss": 0.6012, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0115770153889595e-05, |
|
"loss": 0.5921, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.998117558473342e-06, |
|
"loss": 0.6039, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.880464963057086e-06, |
|
"loss": 0.6064, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.76281236764083e-06, |
|
"loss": 0.6084, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.645159772224576e-06, |
|
"loss": 0.5933, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.52750717680832e-06, |
|
"loss": 0.6008, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.409854581392067e-06, |
|
"loss": 0.6048, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.29220198597581e-06, |
|
"loss": 0.5933, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.174549390559556e-06, |
|
"loss": 0.5908, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.056896795143301e-06, |
|
"loss": 0.5921, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.939244199727046e-06, |
|
"loss": 0.6012, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.821591604310792e-06, |
|
"loss": 0.5988, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.703939008894537e-06, |
|
"loss": 0.6019, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.58628641347828e-06, |
|
"loss": 0.5915, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.468633818062028e-06, |
|
"loss": 0.6104, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.350981222645773e-06, |
|
"loss": 0.6093, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.233328627229516e-06, |
|
"loss": 0.6084, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.115676031813262e-06, |
|
"loss": 0.5989, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.998023436397007e-06, |
|
"loss": 0.6088, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.880370840980752e-06, |
|
"loss": 0.5889, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.762718245564498e-06, |
|
"loss": 0.5819, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.645065650148241e-06, |
|
"loss": 0.5901, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.527413054731988e-06, |
|
"loss": 0.58, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.409760459315733e-06, |
|
"loss": 0.5963, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.292107863899478e-06, |
|
"loss": 0.604, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.174455268483223e-06, |
|
"loss": 0.5984, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.056802673066969e-06, |
|
"loss": 0.591, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.939150077650713e-06, |
|
"loss": 0.5932, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.8214974822344585e-06, |
|
"loss": 0.5996, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.703844886818203e-06, |
|
"loss": 0.5987, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.586192291401949e-06, |
|
"loss": 0.5859, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.468539695985694e-06, |
|
"loss": 0.594, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.350887100569439e-06, |
|
"loss": 0.5836, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.233234505153184e-06, |
|
"loss": 0.5789, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.115581909736929e-06, |
|
"loss": 0.595, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.997929314320674e-06, |
|
"loss": 0.5817, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.880276718904419e-06, |
|
"loss": 0.5926, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.762624123488165e-06, |
|
"loss": 0.6029, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.644971528071909e-06, |
|
"loss": 0.5888, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.527318932655655e-06, |
|
"loss": 0.5843, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.4096663372394e-06, |
|
"loss": 0.5794, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.292013741823145e-06, |
|
"loss": 0.5829, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.1743611464068895e-06, |
|
"loss": 0.5903, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.056708550990636e-06, |
|
"loss": 0.5742, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.93905595557438e-06, |
|
"loss": 0.5948, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.821403360158125e-06, |
|
"loss": 0.5807, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.703750764741871e-06, |
|
"loss": 0.5824, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.586098169325616e-06, |
|
"loss": 0.5818, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.4684455739093605e-06, |
|
"loss": 0.5871, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.350792978493106e-06, |
|
"loss": 0.5814, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.233140383076851e-06, |
|
"loss": 0.5851, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.1154877876605964e-06, |
|
"loss": 0.5794, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.997835192244341e-06, |
|
"loss": 0.5914, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.880182596828086e-06, |
|
"loss": 0.5792, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.762530001411831e-06, |
|
"loss": 0.5941, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.644877405995577e-06, |
|
"loss": 0.5673, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5272248105793217e-06, |
|
"loss": 0.5818, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.409572215163067e-06, |
|
"loss": 0.5828, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.291919619746812e-06, |
|
"loss": 0.5798, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1742670243305572e-06, |
|
"loss": 0.5714, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.056614428914302e-06, |
|
"loss": 0.5866, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.938961833498047e-06, |
|
"loss": 0.578, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8213092380817923e-06, |
|
"loss": 0.5924, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.703656642665537e-06, |
|
"loss": 0.5854, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.5860040472492825e-06, |
|
"loss": 0.5865, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.4683514518330274e-06, |
|
"loss": 0.5933, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3506988564167727e-06, |
|
"loss": 0.5748, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.233046261000518e-06, |
|
"loss": 0.5913, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.115393665584263e-06, |
|
"loss": 0.5811, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.997741070168008e-06, |
|
"loss": 0.5835, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.880088474751753e-06, |
|
"loss": 0.5836, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7624358793354984e-06, |
|
"loss": 0.5737, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6447832839192435e-06, |
|
"loss": 0.5813, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5271306885029886e-06, |
|
"loss": 0.5837, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4094780930867337e-06, |
|
"loss": 0.5816, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2918254976704786e-06, |
|
"loss": 0.5871, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1741729022542237e-06, |
|
"loss": 0.5657, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0565203068379688e-06, |
|
"loss": 0.584, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.38867711421714e-07, |
|
"loss": 0.5672, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.212151160054591e-07, |
|
"loss": 0.5814, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.035625205892043e-07, |
|
"loss": 0.579, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5.859099251729494e-07, |
|
"loss": 0.5969, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.6825732975669443e-07, |
|
"loss": 0.5796, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.506047343404396e-07, |
|
"loss": 0.5699, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.3295213892418468e-07, |
|
"loss": 0.5749, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.152995435079298e-07, |
|
"loss": 0.5678, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 212490, |
|
"total_flos": 4.485693256017408e+17, |
|
"train_loss": 0.7508337134063662, |
|
"train_runtime": 150510.9458, |
|
"train_samples_per_second": 11.294, |
|
"train_steps_per_second": 1.412 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 212490, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"total_flos": 4.485693256017408e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|