|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.414368184733804, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.959910198845414e-05, |
|
"loss": 13.3635, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.919820397690827e-05, |
|
"loss": 3.169, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.662490725517273, |
|
"eval_runtime": 80.6872, |
|
"eval_samples_per_second": 62.922, |
|
"eval_steps_per_second": 31.467, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8797305965362415e-05, |
|
"loss": 0.578, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.839640795381655e-05, |
|
"loss": 0.2929, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.18057043850421906, |
|
"eval_runtime": 80.3642, |
|
"eval_samples_per_second": 63.175, |
|
"eval_steps_per_second": 31.594, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.799550994227069e-05, |
|
"loss": 0.2481, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.759461193072483e-05, |
|
"loss": 0.2159, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.1447182148694992, |
|
"eval_runtime": 80.0634, |
|
"eval_samples_per_second": 63.412, |
|
"eval_steps_per_second": 31.712, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.719371391917896e-05, |
|
"loss": 0.2131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6792815907633104e-05, |
|
"loss": 0.1955, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.13617677986621857, |
|
"eval_runtime": 80.8812, |
|
"eval_samples_per_second": 62.771, |
|
"eval_steps_per_second": 31.392, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.639191789608724e-05, |
|
"loss": 0.1799, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.5991019884541375e-05, |
|
"loss": 0.1785, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.12113407254219055, |
|
"eval_runtime": 81.0258, |
|
"eval_samples_per_second": 62.659, |
|
"eval_steps_per_second": 31.336, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.559012187299551e-05, |
|
"loss": 0.1664, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5189223861449645e-05, |
|
"loss": 0.1661, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.1217775046825409, |
|
"eval_runtime": 80.4348, |
|
"eval_samples_per_second": 63.119, |
|
"eval_steps_per_second": 31.566, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.478832584990379e-05, |
|
"loss": 0.152, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.438742783835793e-05, |
|
"loss": 0.1429, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.11830633133649826, |
|
"eval_runtime": 80.5584, |
|
"eval_samples_per_second": 63.023, |
|
"eval_steps_per_second": 31.518, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.3986529826812064e-05, |
|
"loss": 0.1436, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.35856318152662e-05, |
|
"loss": 0.138, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.11041316390037537, |
|
"eval_runtime": 80.3972, |
|
"eval_samples_per_second": 63.149, |
|
"eval_steps_per_second": 31.581, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.3184733803720335e-05, |
|
"loss": 0.139, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.278383579217447e-05, |
|
"loss": 0.1328, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.11453410983085632, |
|
"eval_runtime": 80.9914, |
|
"eval_samples_per_second": 62.686, |
|
"eval_steps_per_second": 31.349, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.238293778062861e-05, |
|
"loss": 0.1336, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.198203976908275e-05, |
|
"loss": 0.1257, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.11141891777515411, |
|
"eval_runtime": 80.764, |
|
"eval_samples_per_second": 62.862, |
|
"eval_steps_per_second": 31.437, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.158114175753688e-05, |
|
"loss": 0.129, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.118024374599102e-05, |
|
"loss": 0.1283, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.10540181398391724, |
|
"eval_runtime": 80.8099, |
|
"eval_samples_per_second": 62.826, |
|
"eval_steps_per_second": 31.419, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.077934573444516e-05, |
|
"loss": 0.1329, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.03784477228993e-05, |
|
"loss": 0.1233, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.10773167759180069, |
|
"eval_runtime": 80.5271, |
|
"eval_samples_per_second": 63.047, |
|
"eval_steps_per_second": 31.53, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.9977549711353436e-05, |
|
"loss": 0.121, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.957665169980757e-05, |
|
"loss": 0.1022, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.1054486483335495, |
|
"eval_runtime": 80.727, |
|
"eval_samples_per_second": 62.891, |
|
"eval_steps_per_second": 31.452, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.917575368826171e-05, |
|
"loss": 0.113, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.877485567671584e-05, |
|
"loss": 0.1015, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.10687392950057983, |
|
"eval_runtime": 80.3483, |
|
"eval_samples_per_second": 63.187, |
|
"eval_steps_per_second": 31.6, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.8373957665169984e-05, |
|
"loss": 0.1052, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.797305965362412e-05, |
|
"loss": 0.1013, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.0990850031375885, |
|
"eval_runtime": 80.592, |
|
"eval_samples_per_second": 62.996, |
|
"eval_steps_per_second": 31.504, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7572161642078254e-05, |
|
"loss": 0.1107, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.7171263630532396e-05, |
|
"loss": 0.1024, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.10092990845441818, |
|
"eval_runtime": 81.1078, |
|
"eval_samples_per_second": 62.596, |
|
"eval_steps_per_second": 31.304, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.677036561898653e-05, |
|
"loss": 0.0986, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.6369467607440674e-05, |
|
"loss": 0.1011, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.09937073290348053, |
|
"eval_runtime": 79.8753, |
|
"eval_samples_per_second": 63.562, |
|
"eval_steps_per_second": 31.787, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.596856959589481e-05, |
|
"loss": 0.1009, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.5567671584348944e-05, |
|
"loss": 0.1011, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.09842894226312637, |
|
"eval_runtime": 80.4877, |
|
"eval_samples_per_second": 63.078, |
|
"eval_steps_per_second": 31.545, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.516677357280308e-05, |
|
"loss": 0.1008, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.4765875561257214e-05, |
|
"loss": 0.0935, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.1000189483165741, |
|
"eval_runtime": 80.5915, |
|
"eval_samples_per_second": 62.997, |
|
"eval_steps_per_second": 31.505, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.436497754971135e-05, |
|
"loss": 0.0847, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.396407953816549e-05, |
|
"loss": 0.086, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 0.10353543609380722, |
|
"eval_runtime": 80.3489, |
|
"eval_samples_per_second": 63.187, |
|
"eval_steps_per_second": 31.6, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 3.3563181526619633e-05, |
|
"loss": 0.0858, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.316228351507377e-05, |
|
"loss": 0.0868, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 0.10058429092168808, |
|
"eval_runtime": 80.7731, |
|
"eval_samples_per_second": 62.855, |
|
"eval_steps_per_second": 31.434, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.2761385503527904e-05, |
|
"loss": 0.0822, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.236048749198204e-05, |
|
"loss": 0.0854, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 0.09908822923898697, |
|
"eval_runtime": 80.4062, |
|
"eval_samples_per_second": 63.142, |
|
"eval_steps_per_second": 31.577, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.195958948043618e-05, |
|
"loss": 0.0835, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.1558691468890316e-05, |
|
"loss": 0.0898, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 0.09145330637693405, |
|
"eval_runtime": 81.1442, |
|
"eval_samples_per_second": 62.568, |
|
"eval_steps_per_second": 31.29, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 3.115779345734445e-05, |
|
"loss": 0.0876, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.0756895445798587e-05, |
|
"loss": 0.08, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 0.09377244859933853, |
|
"eval_runtime": 80.6911, |
|
"eval_samples_per_second": 62.919, |
|
"eval_steps_per_second": 31.466, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.0355997434252725e-05, |
|
"loss": 0.091, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 2.9955099422706867e-05, |
|
"loss": 0.0778, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_loss": 0.09943201392889023, |
|
"eval_runtime": 80.4527, |
|
"eval_samples_per_second": 63.105, |
|
"eval_steps_per_second": 31.559, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.9554201411161002e-05, |
|
"loss": 0.0704, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 2.915330339961514e-05, |
|
"loss": 0.0699, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 0.09294962137937546, |
|
"eval_runtime": 80.2201, |
|
"eval_samples_per_second": 63.288, |
|
"eval_steps_per_second": 31.65, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.8752405388069276e-05, |
|
"loss": 0.079, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.835150737652341e-05, |
|
"loss": 0.0754, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.09366082400083542, |
|
"eval_runtime": 80.2272, |
|
"eval_samples_per_second": 63.283, |
|
"eval_steps_per_second": 31.648, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 2.7950609364977553e-05, |
|
"loss": 0.0707, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.754971135343169e-05, |
|
"loss": 0.0727, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_loss": 0.09205422550439835, |
|
"eval_runtime": 80.3418, |
|
"eval_samples_per_second": 63.193, |
|
"eval_steps_per_second": 31.602, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.7148813341885827e-05, |
|
"loss": 0.0723, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.6747915330339962e-05, |
|
"loss": 0.0718, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 0.09299702942371368, |
|
"eval_runtime": 80.7661, |
|
"eval_samples_per_second": 62.861, |
|
"eval_steps_per_second": 31.436, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.6347017318794097e-05, |
|
"loss": 0.0725, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.594611930724824e-05, |
|
"loss": 0.0741, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 0.09144437313079834, |
|
"eval_runtime": 80.7469, |
|
"eval_samples_per_second": 62.875, |
|
"eval_steps_per_second": 31.444, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 2.5545221295702375e-05, |
|
"loss": 0.0708, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.5144323284156513e-05, |
|
"loss": 0.0687, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"eval_loss": 0.0920889675617218, |
|
"eval_runtime": 80.5972, |
|
"eval_samples_per_second": 62.992, |
|
"eval_steps_per_second": 31.502, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.474342527261065e-05, |
|
"loss": 0.0677, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 2.4342527261064787e-05, |
|
"loss": 0.0617, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.09358620643615723, |
|
"eval_runtime": 80.47, |
|
"eval_samples_per_second": 63.092, |
|
"eval_steps_per_second": 31.552, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 2.3941629249518922e-05, |
|
"loss": 0.0591, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.354073123797306e-05, |
|
"loss": 0.0609, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 0.09032619744539261, |
|
"eval_runtime": 80.985, |
|
"eval_samples_per_second": 62.691, |
|
"eval_steps_per_second": 31.351, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 2.31398332264272e-05, |
|
"loss": 0.0657, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 2.2738935214881335e-05, |
|
"loss": 0.0652, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.0909246951341629, |
|
"eval_runtime": 81.1024, |
|
"eval_samples_per_second": 62.6, |
|
"eval_steps_per_second": 31.306, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 2.233803720333547e-05, |
|
"loss": 0.0629, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.1937139191789612e-05, |
|
"loss": 0.0641, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 0.09035832434892654, |
|
"eval_runtime": 80.259, |
|
"eval_samples_per_second": 63.258, |
|
"eval_steps_per_second": 31.635, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.1536241180243747e-05, |
|
"loss": 0.0642, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 2.1135343168697885e-05, |
|
"loss": 0.06, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 0.09021405130624771, |
|
"eval_runtime": 80.1923, |
|
"eval_samples_per_second": 63.31, |
|
"eval_steps_per_second": 31.661, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.073444515715202e-05, |
|
"loss": 0.063, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 2.033354714560616e-05, |
|
"loss": 0.0643, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 0.0934990718960762, |
|
"eval_runtime": 79.9682, |
|
"eval_samples_per_second": 63.488, |
|
"eval_steps_per_second": 31.75, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.9932649134060298e-05, |
|
"loss": 0.0641, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 1.9531751122514433e-05, |
|
"loss": 0.0552, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_loss": 0.09127607196569443, |
|
"eval_runtime": 81.2206, |
|
"eval_samples_per_second": 62.509, |
|
"eval_steps_per_second": 31.261, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.913085311096857e-05, |
|
"loss": 0.0586, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.8729955099422707e-05, |
|
"loss": 0.0517, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_loss": 0.09465406835079193, |
|
"eval_runtime": 80.2376, |
|
"eval_samples_per_second": 63.275, |
|
"eval_steps_per_second": 31.644, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 1.8329057087876845e-05, |
|
"loss": 0.055, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.7928159076330984e-05, |
|
"loss": 0.0592, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_loss": 0.08911187946796417, |
|
"eval_runtime": 80.0512, |
|
"eval_samples_per_second": 63.422, |
|
"eval_steps_per_second": 31.717, |
|
"step": 40000 |
|
} |
|
], |
|
"max_steps": 62360, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.9182220110515405e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|