|
{ |
|
"best_metric": 0.7241379310344828, |
|
"best_model_checkpoint": "swinv2-base-patch4-window8-256/checkpoint-28", |
|
"epoch": 29.734513274336283, |
|
"eval_steps": 500, |
|
"global_step": 840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35398230088495575, |
|
"grad_norm": 9.937736511230469, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.6341, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.7079646017699115, |
|
"grad_norm": 7.19003438949585, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.5428, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9911504424778761, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6211036443710327, |
|
"eval_runtime": 1.0465, |
|
"eval_samples_per_second": 27.71, |
|
"eval_steps_per_second": 27.71, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.0619469026548674, |
|
"grad_norm": 9.382635116577148, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.5783, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.415929203539823, |
|
"grad_norm": 6.914736270904541, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.5546, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.7699115044247788, |
|
"grad_norm": 5.816603183746338, |
|
"learning_rate": 5.9523809523809524e-05, |
|
"loss": 0.6494, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.9823008849557522, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6130104064941406, |
|
"eval_runtime": 1.0448, |
|
"eval_samples_per_second": 27.757, |
|
"eval_steps_per_second": 27.757, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.1238938053097347, |
|
"grad_norm": 1.15414559841156, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 0.6304, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.47787610619469, |
|
"grad_norm": 3.671969175338745, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.6236, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.831858407079646, |
|
"grad_norm": 7.640573501586914, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 0.5752, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.9734513274336285, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6845608949661255, |
|
"eval_runtime": 1.0508, |
|
"eval_samples_per_second": 27.597, |
|
"eval_steps_per_second": 27.597, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 3.185840707964602, |
|
"grad_norm": 1.3297613859176636, |
|
"learning_rate": 9.920634920634922e-05, |
|
"loss": 0.6231, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.5398230088495577, |
|
"grad_norm": 1.8182591199874878, |
|
"learning_rate": 9.78835978835979e-05, |
|
"loss": 0.6603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.893805309734513, |
|
"grad_norm": 0.6220849752426147, |
|
"learning_rate": 9.656084656084657e-05, |
|
"loss": 0.7165, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.964207112789154, |
|
"eval_runtime": 2.4229, |
|
"eval_samples_per_second": 11.969, |
|
"eval_steps_per_second": 11.969, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 4.247787610619469, |
|
"grad_norm": 0.8275606036186218, |
|
"learning_rate": 9.523809523809524e-05, |
|
"loss": 0.6544, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 4.601769911504425, |
|
"grad_norm": 5.596276760101318, |
|
"learning_rate": 9.391534391534393e-05, |
|
"loss": 0.6842, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.95575221238938, |
|
"grad_norm": 0.6999258995056152, |
|
"learning_rate": 9.25925925925926e-05, |
|
"loss": 0.5699, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.991150442477876, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6072261929512024, |
|
"eval_runtime": 1.0484, |
|
"eval_samples_per_second": 27.662, |
|
"eval_steps_per_second": 27.662, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 5.3097345132743365, |
|
"grad_norm": 5.042980194091797, |
|
"learning_rate": 9.126984126984128e-05, |
|
"loss": 0.6453, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 5.663716814159292, |
|
"grad_norm": 2.5722317695617676, |
|
"learning_rate": 8.994708994708995e-05, |
|
"loss": 0.5517, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 5.982300884955752, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6231208443641663, |
|
"eval_runtime": 1.0298, |
|
"eval_samples_per_second": 28.161, |
|
"eval_steps_per_second": 28.161, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 6.017699115044247, |
|
"grad_norm": 0.5366746783256531, |
|
"learning_rate": 8.862433862433864e-05, |
|
"loss": 0.5733, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 6.371681415929204, |
|
"grad_norm": 0.9455431699752808, |
|
"learning_rate": 8.730158730158731e-05, |
|
"loss": 0.6246, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 6.725663716814159, |
|
"grad_norm": 1.3559240102767944, |
|
"learning_rate": 8.597883597883598e-05, |
|
"loss": 0.5268, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 6.9734513274336285, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6098366379737854, |
|
"eval_runtime": 1.0741, |
|
"eval_samples_per_second": 27.0, |
|
"eval_steps_per_second": 27.0, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 7.079646017699115, |
|
"grad_norm": 0.570971667766571, |
|
"learning_rate": 8.465608465608466e-05, |
|
"loss": 0.5397, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.433628318584071, |
|
"grad_norm": 1.1077288389205933, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 0.4539, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 7.787610619469026, |
|
"grad_norm": 1.5305235385894775, |
|
"learning_rate": 8.201058201058202e-05, |
|
"loss": 0.672, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5890592932701111, |
|
"eval_runtime": 1.1176, |
|
"eval_samples_per_second": 25.948, |
|
"eval_steps_per_second": 25.948, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 8.141592920353983, |
|
"grad_norm": 1.5520473718643188, |
|
"learning_rate": 8.068783068783069e-05, |
|
"loss": 0.5567, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 8.495575221238939, |
|
"grad_norm": 1.5060292482376099, |
|
"learning_rate": 7.936507936507937e-05, |
|
"loss": 0.6923, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 8.849557522123893, |
|
"grad_norm": 0.2911463677883148, |
|
"learning_rate": 7.804232804232805e-05, |
|
"loss": 0.5448, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 8.991150442477876, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6022756695747375, |
|
"eval_runtime": 1.0313, |
|
"eval_samples_per_second": 28.12, |
|
"eval_steps_per_second": 28.12, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 9.20353982300885, |
|
"grad_norm": 0.38871487975120544, |
|
"learning_rate": 7.671957671957673e-05, |
|
"loss": 0.4555, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 9.557522123893806, |
|
"grad_norm": 1.8789817094802856, |
|
"learning_rate": 7.53968253968254e-05, |
|
"loss": 0.7061, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 9.91150442477876, |
|
"grad_norm": 1.5270930528640747, |
|
"learning_rate": 7.407407407407407e-05, |
|
"loss": 0.555, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 9.982300884955752, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5916692614555359, |
|
"eval_runtime": 1.0352, |
|
"eval_samples_per_second": 28.013, |
|
"eval_steps_per_second": 28.013, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 10.265486725663717, |
|
"grad_norm": 1.4813481569290161, |
|
"learning_rate": 7.275132275132276e-05, |
|
"loss": 0.5704, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 10.619469026548673, |
|
"grad_norm": 1.4199880361557007, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 0.5333, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.973451327433628, |
|
"grad_norm": 1.5007575750350952, |
|
"learning_rate": 7.010582010582011e-05, |
|
"loss": 0.5818, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 10.973451327433628, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5940178632736206, |
|
"eval_runtime": 1.0438, |
|
"eval_samples_per_second": 27.784, |
|
"eval_steps_per_second": 27.784, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 11.327433628318584, |
|
"grad_norm": 0.39495939016342163, |
|
"learning_rate": 6.878306878306878e-05, |
|
"loss": 0.5062, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 11.68141592920354, |
|
"grad_norm": 0.27960801124572754, |
|
"learning_rate": 6.746031746031747e-05, |
|
"loss": 0.6556, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5965593457221985, |
|
"eval_runtime": 1.0472, |
|
"eval_samples_per_second": 27.693, |
|
"eval_steps_per_second": 27.693, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 12.035398230088495, |
|
"grad_norm": 1.464916467666626, |
|
"learning_rate": 6.613756613756614e-05, |
|
"loss": 0.4407, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 12.389380530973451, |
|
"grad_norm": 0.8306865096092224, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 0.5427, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 12.743362831858407, |
|
"grad_norm": 1.798261284828186, |
|
"learning_rate": 6.349206349206349e-05, |
|
"loss": 0.716, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 12.991150442477876, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5903679132461548, |
|
"eval_runtime": 1.0433, |
|
"eval_samples_per_second": 27.795, |
|
"eval_steps_per_second": 27.795, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 13.097345132743364, |
|
"grad_norm": 0.575097918510437, |
|
"learning_rate": 6.216931216931218e-05, |
|
"loss": 0.4514, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 13.451327433628318, |
|
"grad_norm": 1.393051028251648, |
|
"learning_rate": 6.084656084656085e-05, |
|
"loss": 0.5387, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 13.805309734513274, |
|
"grad_norm": 2.5438356399536133, |
|
"learning_rate": 5.9523809523809524e-05, |
|
"loss": 0.6104, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 13.982300884955752, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5938333868980408, |
|
"eval_runtime": 1.0518, |
|
"eval_samples_per_second": 27.571, |
|
"eval_steps_per_second": 27.571, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 14.15929203539823, |
|
"grad_norm": 1.7508420944213867, |
|
"learning_rate": 5.82010582010582e-05, |
|
"loss": 0.5812, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 14.513274336283185, |
|
"grad_norm": 0.19824433326721191, |
|
"learning_rate": 5.6878306878306885e-05, |
|
"loss": 0.5678, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 14.867256637168142, |
|
"grad_norm": 0.09461919218301773, |
|
"learning_rate": 5.555555555555556e-05, |
|
"loss": 0.5046, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 14.973451327433628, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5921294689178467, |
|
"eval_runtime": 1.0718, |
|
"eval_samples_per_second": 27.057, |
|
"eval_steps_per_second": 27.057, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 15.221238938053098, |
|
"grad_norm": 0.46888694167137146, |
|
"learning_rate": 5.423280423280423e-05, |
|
"loss": 0.5344, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 15.575221238938052, |
|
"grad_norm": 2.1239452362060547, |
|
"learning_rate": 5.291005291005291e-05, |
|
"loss": 0.4711, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 15.929203539823009, |
|
"grad_norm": 2.082629442214966, |
|
"learning_rate": 5.158730158730159e-05, |
|
"loss": 0.5871, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6027411818504333, |
|
"eval_runtime": 1.0332, |
|
"eval_samples_per_second": 28.069, |
|
"eval_steps_per_second": 28.069, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 16.283185840707965, |
|
"grad_norm": 3.216160535812378, |
|
"learning_rate": 5.026455026455027e-05, |
|
"loss": 0.7275, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 16.63716814159292, |
|
"grad_norm": 0.5754966139793396, |
|
"learning_rate": 4.894179894179895e-05, |
|
"loss": 0.503, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 16.991150442477878, |
|
"grad_norm": 1.4574097394943237, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.5222, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 16.991150442477878, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5921464562416077, |
|
"eval_runtime": 1.0314, |
|
"eval_samples_per_second": 28.118, |
|
"eval_steps_per_second": 28.118, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 17.345132743362832, |
|
"grad_norm": 0.36778560280799866, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.5548, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 17.699115044247787, |
|
"grad_norm": 1.3566818237304688, |
|
"learning_rate": 4.4973544973544974e-05, |
|
"loss": 0.5511, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 17.98230088495575, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5947627425193787, |
|
"eval_runtime": 1.0877, |
|
"eval_samples_per_second": 26.661, |
|
"eval_steps_per_second": 26.661, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 18.053097345132745, |
|
"grad_norm": 3.3262369632720947, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.6192, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 18.4070796460177, |
|
"grad_norm": 0.4368630051612854, |
|
"learning_rate": 4.232804232804233e-05, |
|
"loss": 0.4436, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 18.761061946902654, |
|
"grad_norm": 0.6899360418319702, |
|
"learning_rate": 4.100529100529101e-05, |
|
"loss": 0.6394, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 18.97345132743363, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5969327688217163, |
|
"eval_runtime": 1.0482, |
|
"eval_samples_per_second": 27.667, |
|
"eval_steps_per_second": 27.667, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 19.115044247787612, |
|
"grad_norm": 0.5160775780677795, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.5015, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 19.469026548672566, |
|
"grad_norm": 1.3984384536743164, |
|
"learning_rate": 3.835978835978836e-05, |
|
"loss": 0.6334, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 19.82300884955752, |
|
"grad_norm": 0.24300755560398102, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.566, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6005056500434875, |
|
"eval_runtime": 1.0644, |
|
"eval_samples_per_second": 27.247, |
|
"eval_steps_per_second": 27.247, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 20.17699115044248, |
|
"grad_norm": 1.3175139427185059, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.4341, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 20.530973451327434, |
|
"grad_norm": 1.367430567741394, |
|
"learning_rate": 3.439153439153439e-05, |
|
"loss": 0.5373, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 20.884955752212388, |
|
"grad_norm": 0.3279801905155182, |
|
"learning_rate": 3.306878306878307e-05, |
|
"loss": 0.6032, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 20.991150442477878, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5968325138092041, |
|
"eval_runtime": 1.0696, |
|
"eval_samples_per_second": 27.113, |
|
"eval_steps_per_second": 27.113, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 21.238938053097346, |
|
"grad_norm": 3.318119764328003, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.5679, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.5929203539823, |
|
"grad_norm": 1.7342548370361328, |
|
"learning_rate": 3.0423280423280425e-05, |
|
"loss": 0.6877, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 21.946902654867255, |
|
"grad_norm": 0.20895572006702423, |
|
"learning_rate": 2.91005291005291e-05, |
|
"loss": 0.4824, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 21.98230088495575, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5933610200881958, |
|
"eval_runtime": 1.0375, |
|
"eval_samples_per_second": 27.951, |
|
"eval_steps_per_second": 27.951, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 22.300884955752213, |
|
"grad_norm": 0.5702241659164429, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.5076, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 22.654867256637168, |
|
"grad_norm": 1.405441164970398, |
|
"learning_rate": 2.6455026455026456e-05, |
|
"loss": 0.4975, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 22.97345132743363, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5979170799255371, |
|
"eval_runtime": 1.0599, |
|
"eval_samples_per_second": 27.362, |
|
"eval_steps_per_second": 27.362, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 23.008849557522122, |
|
"grad_norm": 1.4034713506698608, |
|
"learning_rate": 2.5132275132275137e-05, |
|
"loss": 0.5977, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 23.36283185840708, |
|
"grad_norm": 1.3884029388427734, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.4975, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 23.716814159292035, |
|
"grad_norm": 0.6450229287147522, |
|
"learning_rate": 2.2486772486772487e-05, |
|
"loss": 0.4976, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.603380560874939, |
|
"eval_runtime": 1.0349, |
|
"eval_samples_per_second": 28.023, |
|
"eval_steps_per_second": 28.023, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 24.07079646017699, |
|
"grad_norm": 0.3577538728713989, |
|
"learning_rate": 2.1164021164021164e-05, |
|
"loss": 0.7906, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 24.424778761061948, |
|
"grad_norm": 0.5209062099456787, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.4288, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 24.778761061946902, |
|
"grad_norm": 1.9989219903945923, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5355, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 24.991150442477878, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6033223867416382, |
|
"eval_runtime": 1.0457, |
|
"eval_samples_per_second": 27.733, |
|
"eval_steps_per_second": 27.733, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 25.13274336283186, |
|
"grad_norm": 1.9387425184249878, |
|
"learning_rate": 1.7195767195767195e-05, |
|
"loss": 0.6319, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 25.486725663716815, |
|
"grad_norm": 0.4894324541091919, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.5932, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 25.84070796460177, |
|
"grad_norm": 0.26004230976104736, |
|
"learning_rate": 1.455026455026455e-05, |
|
"loss": 0.4323, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 25.98230088495575, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.601476788520813, |
|
"eval_runtime": 1.0397, |
|
"eval_samples_per_second": 27.892, |
|
"eval_steps_per_second": 27.892, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 26.194690265486727, |
|
"grad_norm": 1.3441038131713867, |
|
"learning_rate": 1.3227513227513228e-05, |
|
"loss": 0.6053, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 26.548672566371682, |
|
"grad_norm": 1.3334237337112427, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.5, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 26.902654867256636, |
|
"grad_norm": 0.3579448461532593, |
|
"learning_rate": 1.0582010582010582e-05, |
|
"loss": 0.5579, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 26.97345132743363, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6042701601982117, |
|
"eval_runtime": 1.0854, |
|
"eval_samples_per_second": 26.718, |
|
"eval_steps_per_second": 26.718, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 27.256637168141594, |
|
"grad_norm": 1.311233639717102, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.5794, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 27.61061946902655, |
|
"grad_norm": 1.3221209049224854, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.494, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 27.964601769911503, |
|
"grad_norm": 0.31315815448760986, |
|
"learning_rate": 6.613756613756614e-06, |
|
"loss": 0.5639, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.6023350358009338, |
|
"eval_runtime": 1.0409, |
|
"eval_samples_per_second": 27.86, |
|
"eval_steps_per_second": 27.86, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 28.31858407079646, |
|
"grad_norm": 0.7090272307395935, |
|
"learning_rate": 5.291005291005291e-06, |
|
"loss": 0.5711, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 28.672566371681416, |
|
"grad_norm": 1.377335548400879, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.5595, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 28.991150442477878, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.5995790958404541, |
|
"eval_runtime": 1.0359, |
|
"eval_samples_per_second": 27.995, |
|
"eval_steps_per_second": 27.995, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 29.02654867256637, |
|
"grad_norm": 1.294968605041504, |
|
"learning_rate": 2.6455026455026455e-06, |
|
"loss": 0.5339, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 29.38053097345133, |
|
"grad_norm": 1.297398328781128, |
|
"learning_rate": 1.3227513227513228e-06, |
|
"loss": 0.6959, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 29.734513274336283, |
|
"grad_norm": 1.3724257946014404, |
|
"learning_rate": 0.0, |
|
"loss": 0.4372, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 29.734513274336283, |
|
"eval_accuracy": 0.7241379310344828, |
|
"eval_loss": 0.599529504776001, |
|
"eval_runtime": 1.0824, |
|
"eval_samples_per_second": 26.793, |
|
"eval_steps_per_second": 26.793, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 29.734513274336283, |
|
"step": 840, |
|
"total_flos": 3.444219560381645e+17, |
|
"train_loss": 0.5698859515644256, |
|
"train_runtime": 500.495, |
|
"train_samples_per_second": 6.773, |
|
"train_steps_per_second": 1.678 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 840, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.444219560381645e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|