|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.666666666666667, |
|
"eval_steps": 10, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9198396793587176e-05, |
|
"loss": 1.4613, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.749655842781067, |
|
"eval_runtime": 9.1057, |
|
"eval_samples_per_second": 0.988, |
|
"eval_steps_per_second": 0.22, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.829659318637275e-05, |
|
"loss": 0.8073, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.7181354761123657, |
|
"eval_runtime": 9.0405, |
|
"eval_samples_per_second": 0.996, |
|
"eval_steps_per_second": 0.221, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.7294589178356715e-05, |
|
"loss": 0.5993, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6926982402801514, |
|
"eval_runtime": 9.0635, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.629258517034069e-05, |
|
"loss": 0.5535, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.5878939628601074, |
|
"eval_runtime": 9.0858, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.529058116232465e-05, |
|
"loss": 0.4914, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.6263775825500488, |
|
"eval_runtime": 9.0441, |
|
"eval_samples_per_second": 0.995, |
|
"eval_steps_per_second": 0.221, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.428857715430862e-05, |
|
"loss": 0.3532, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.5376472473144531, |
|
"eval_runtime": 9.0097, |
|
"eval_samples_per_second": 0.999, |
|
"eval_steps_per_second": 0.222, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.3286573146292584e-05, |
|
"loss": 0.3079, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.50021231174469, |
|
"eval_runtime": 9.0392, |
|
"eval_samples_per_second": 0.996, |
|
"eval_steps_per_second": 0.221, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.228456913827655e-05, |
|
"loss": 0.32, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.4650906324386597, |
|
"eval_runtime": 9.0603, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.128256513026052e-05, |
|
"loss": 0.22, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.5176193714141846, |
|
"eval_runtime": 9.0417, |
|
"eval_samples_per_second": 0.995, |
|
"eval_steps_per_second": 0.221, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.0280561122244495e-05, |
|
"loss": 0.2341, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 1.4975649118423462, |
|
"eval_runtime": 9.0642, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.927855711422846e-05, |
|
"loss": 0.2391, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 1.7257053852081299, |
|
"eval_runtime": 8.9861, |
|
"eval_samples_per_second": 1.002, |
|
"eval_steps_per_second": 0.223, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.8276553106212426e-05, |
|
"loss": 0.1884, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.639228105545044, |
|
"eval_runtime": 9.0813, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.727454909819639e-05, |
|
"loss": 0.1881, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 1.5887799263000488, |
|
"eval_runtime": 9.08, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.627254509018036e-05, |
|
"loss": 0.1314, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 1.6493072509765625, |
|
"eval_runtime": 9.0781, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.527054108216433e-05, |
|
"loss": 0.1524, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.6057649850845337, |
|
"eval_runtime": 9.0159, |
|
"eval_samples_per_second": 0.998, |
|
"eval_steps_per_second": 0.222, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.42685370741483e-05, |
|
"loss": 0.1036, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 1.6697440147399902, |
|
"eval_runtime": 8.9746, |
|
"eval_samples_per_second": 1.003, |
|
"eval_steps_per_second": 0.223, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.326653306613227e-05, |
|
"loss": 0.1176, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 1.6768882274627686, |
|
"eval_runtime": 9.0646, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.2264529058116233e-05, |
|
"loss": 0.1375, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.617688775062561, |
|
"eval_runtime": 9.0499, |
|
"eval_samples_per_second": 0.994, |
|
"eval_steps_per_second": 0.221, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.12625250501002e-05, |
|
"loss": 0.1038, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 1.6981985569000244, |
|
"eval_runtime": 9.0561, |
|
"eval_samples_per_second": 0.994, |
|
"eval_steps_per_second": 0.221, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.026052104208417e-05, |
|
"loss": 0.1068, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.7827256917953491, |
|
"eval_runtime": 9.0294, |
|
"eval_samples_per_second": 0.997, |
|
"eval_steps_per_second": 0.221, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.925851703406814e-05, |
|
"loss": 0.1229, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 1.6968226432800293, |
|
"eval_runtime": 9.039, |
|
"eval_samples_per_second": 0.996, |
|
"eval_steps_per_second": 0.221, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.8256513026052106e-05, |
|
"loss": 0.142, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 1.7085565328598022, |
|
"eval_runtime": 9.0746, |
|
"eval_samples_per_second": 0.992, |
|
"eval_steps_per_second": 0.22, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 2.7254509018036072e-05, |
|
"loss": 0.0868, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 1.814492106437683, |
|
"eval_runtime": 9.0805, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.625250501002004e-05, |
|
"loss": 0.0772, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 1.916466236114502, |
|
"eval_runtime": 9.072, |
|
"eval_samples_per_second": 0.992, |
|
"eval_steps_per_second": 0.22, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.5250501002004006e-05, |
|
"loss": 0.0644, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 1.8792904615402222, |
|
"eval_runtime": 9.0616, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.4248496993987975e-05, |
|
"loss": 0.0886, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 1.7695975303649902, |
|
"eval_runtime": 9.0109, |
|
"eval_samples_per_second": 0.999, |
|
"eval_steps_per_second": 0.222, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.3246492985971944e-05, |
|
"loss": 0.0807, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 1.7431364059448242, |
|
"eval_runtime": 9.0009, |
|
"eval_samples_per_second": 1.0, |
|
"eval_steps_per_second": 0.222, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.2244488977955913e-05, |
|
"loss": 0.0873, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 1.8270233869552612, |
|
"eval_runtime": 9.0218, |
|
"eval_samples_per_second": 0.998, |
|
"eval_steps_per_second": 0.222, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.124248496993988e-05, |
|
"loss": 0.0704, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 1.8161801099777222, |
|
"eval_runtime": 9.0309, |
|
"eval_samples_per_second": 0.997, |
|
"eval_steps_per_second": 0.221, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.0240480961923848e-05, |
|
"loss": 0.0729, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.8546494245529175, |
|
"eval_runtime": 9.0378, |
|
"eval_samples_per_second": 0.996, |
|
"eval_steps_per_second": 0.221, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.9238476953907817e-05, |
|
"loss": 0.063, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 1.9666298627853394, |
|
"eval_runtime": 9.008, |
|
"eval_samples_per_second": 0.999, |
|
"eval_steps_per_second": 0.222, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.8236472945891783e-05, |
|
"loss": 0.0541, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_loss": 1.9620505571365356, |
|
"eval_runtime": 9.0643, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.7234468937875752e-05, |
|
"loss": 0.0648, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 1.962950348854065, |
|
"eval_runtime": 9.0685, |
|
"eval_samples_per_second": 0.992, |
|
"eval_steps_per_second": 0.221, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.623246492985972e-05, |
|
"loss": 0.0631, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_loss": 1.9116477966308594, |
|
"eval_runtime": 9.0789, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.523046092184369e-05, |
|
"loss": 0.0545, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 1.9107557535171509, |
|
"eval_runtime": 9.0172, |
|
"eval_samples_per_second": 0.998, |
|
"eval_steps_per_second": 0.222, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.4228456913827657e-05, |
|
"loss": 0.0622, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 1.965379238128662, |
|
"eval_runtime": 8.9497, |
|
"eval_samples_per_second": 1.006, |
|
"eval_steps_per_second": 0.223, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.3226452905811623e-05, |
|
"loss": 0.0523, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"eval_loss": 2.0281553268432617, |
|
"eval_runtime": 9.0358, |
|
"eval_samples_per_second": 0.996, |
|
"eval_steps_per_second": 0.221, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.2224448897795592e-05, |
|
"loss": 0.0479, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"eval_loss": 2.065614938735962, |
|
"eval_runtime": 9.0421, |
|
"eval_samples_per_second": 0.995, |
|
"eval_steps_per_second": 0.221, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 1.122244488977956e-05, |
|
"loss": 0.0373, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 2.0752174854278564, |
|
"eval_runtime": 9.0688, |
|
"eval_samples_per_second": 0.992, |
|
"eval_steps_per_second": 0.221, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.0220440881763528e-05, |
|
"loss": 0.0406, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"eval_loss": 2.0857064723968506, |
|
"eval_runtime": 9.0511, |
|
"eval_samples_per_second": 0.994, |
|
"eval_steps_per_second": 0.221, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 9.218436873747496e-06, |
|
"loss": 0.0463, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 2.118208885192871, |
|
"eval_runtime": 9.0064, |
|
"eval_samples_per_second": 0.999, |
|
"eval_steps_per_second": 0.222, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.216432865731463e-06, |
|
"loss": 0.0433, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 2.1232292652130127, |
|
"eval_runtime": 9.0773, |
|
"eval_samples_per_second": 0.991, |
|
"eval_steps_per_second": 0.22, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 7.214428857715432e-06, |
|
"loss": 0.0425, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.124908924102783, |
|
"eval_runtime": 9.0585, |
|
"eval_samples_per_second": 0.994, |
|
"eval_steps_per_second": 0.221, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 6.212424849699399e-06, |
|
"loss": 0.0413, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 2.125481605529785, |
|
"eval_runtime": 9.0338, |
|
"eval_samples_per_second": 0.996, |
|
"eval_steps_per_second": 0.221, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 5.2104208416833665e-06, |
|
"loss": 0.0579, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.1075000762939453, |
|
"eval_runtime": 9.0414, |
|
"eval_samples_per_second": 0.995, |
|
"eval_steps_per_second": 0.221, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 4.208416833667335e-06, |
|
"loss": 0.0326, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.112290382385254, |
|
"eval_runtime": 8.9941, |
|
"eval_samples_per_second": 1.001, |
|
"eval_steps_per_second": 0.222, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.2064128256513024e-06, |
|
"loss": 0.0378, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.1354782581329346, |
|
"eval_runtime": 9.0542, |
|
"eval_samples_per_second": 0.994, |
|
"eval_steps_per_second": 0.221, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 2.2044088176352706e-06, |
|
"loss": 0.0384, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.1528055667877197, |
|
"eval_runtime": 9.0296, |
|
"eval_samples_per_second": 0.997, |
|
"eval_steps_per_second": 0.221, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.2024048096192386e-06, |
|
"loss": 0.0328, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.162899971008301, |
|
"eval_runtime": 9.0663, |
|
"eval_samples_per_second": 0.993, |
|
"eval_steps_per_second": 0.221, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 2.004008016032064e-07, |
|
"loss": 0.0356, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.1663544178009033, |
|
"eval_runtime": 9.0766, |
|
"eval_samples_per_second": 0.992, |
|
"eval_steps_per_second": 0.22, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 50, |
|
"total_flos": 3.411755398324224e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|