|
{ |
|
"best_metric": 0.8297674418604651, |
|
"best_model_checkpoint": "resnet-18-finetuned-resnet-18-1/checkpoint-450", |
|
"epoch": 49.95238095238095, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1e-05, |
|
"loss": 5.5547, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.004186046511627907, |
|
"eval_loss": 5.4745025634765625, |
|
"eval_runtime": 6.1733, |
|
"eval_samples_per_second": 348.276, |
|
"eval_steps_per_second": 0.81, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2e-05, |
|
"loss": 5.668, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_accuracy": 0.019069767441860466, |
|
"eval_loss": 5.200634479522705, |
|
"eval_runtime": 5.9989, |
|
"eval_samples_per_second": 358.401, |
|
"eval_steps_per_second": 0.833, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3e-05, |
|
"loss": 5.3055, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_accuracy": 0.09023255813953489, |
|
"eval_loss": 4.731231212615967, |
|
"eval_runtime": 5.9716, |
|
"eval_samples_per_second": 360.039, |
|
"eval_steps_per_second": 0.837, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4e-05, |
|
"loss": 4.7641, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_accuracy": 0.21813953488372093, |
|
"eval_loss": 4.053351879119873, |
|
"eval_runtime": 5.9761, |
|
"eval_samples_per_second": 359.764, |
|
"eval_steps_per_second": 0.837, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 5e-05, |
|
"loss": 4.0761, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_accuracy": 0.34325581395348836, |
|
"eval_loss": 3.1956703662872314, |
|
"eval_runtime": 6.1144, |
|
"eval_samples_per_second": 351.631, |
|
"eval_steps_per_second": 0.818, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 3.3846, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_accuracy": 0.4786046511627907, |
|
"eval_loss": 2.493035078048706, |
|
"eval_runtime": 5.9735, |
|
"eval_samples_per_second": 359.926, |
|
"eval_steps_per_second": 0.837, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 2.8319, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_accuracy": 0.5623255813953488, |
|
"eval_loss": 2.0251505374908447, |
|
"eval_runtime": 5.9942, |
|
"eval_samples_per_second": 358.678, |
|
"eval_steps_per_second": 0.834, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.4358, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_accuracy": 0.6334883720930232, |
|
"eval_loss": 1.6915886402130127, |
|
"eval_runtime": 5.9886, |
|
"eval_samples_per_second": 359.016, |
|
"eval_steps_per_second": 0.835, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 2.1433, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_accuracy": 0.6637209302325582, |
|
"eval_loss": 1.456493616104126, |
|
"eval_runtime": 6.0018, |
|
"eval_samples_per_second": 358.223, |
|
"eval_steps_per_second": 0.833, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 1.9315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.7013953488372093, |
|
"eval_loss": 1.2676024436950684, |
|
"eval_runtime": 5.9984, |
|
"eval_samples_per_second": 358.426, |
|
"eval_steps_per_second": 0.834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 1.7746, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_accuracy": 0.7237209302325581, |
|
"eval_loss": 1.1530412435531616, |
|
"eval_runtime": 5.9931, |
|
"eval_samples_per_second": 358.749, |
|
"eval_steps_per_second": 0.834, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 1.6467, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 1.0685006380081177, |
|
"eval_runtime": 6.2755, |
|
"eval_samples_per_second": 342.602, |
|
"eval_steps_per_second": 0.797, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 1.546, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"eval_accuracy": 0.7572093023255814, |
|
"eval_loss": 0.999431312084198, |
|
"eval_runtime": 5.987, |
|
"eval_samples_per_second": 359.114, |
|
"eval_steps_per_second": 0.835, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4734, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"eval_accuracy": 0.7660465116279069, |
|
"eval_loss": 0.946732223033905, |
|
"eval_runtime": 5.9853, |
|
"eval_samples_per_second": 359.213, |
|
"eval_steps_per_second": 0.835, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.4163, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"eval_accuracy": 0.7753488372093024, |
|
"eval_loss": 0.9061232209205627, |
|
"eval_runtime": 5.9975, |
|
"eval_samples_per_second": 358.485, |
|
"eval_steps_per_second": 0.834, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 1.3593, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 15.95, |
|
"eval_accuracy": 0.7832558139534884, |
|
"eval_loss": 0.8717327117919922, |
|
"eval_runtime": 5.9972, |
|
"eval_samples_per_second": 358.502, |
|
"eval_steps_per_second": 0.834, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.3129, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_accuracy": 0.7902325581395349, |
|
"eval_loss": 0.8438239097595215, |
|
"eval_runtime": 5.9855, |
|
"eval_samples_per_second": 359.203, |
|
"eval_steps_per_second": 0.835, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 1.2843, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"eval_accuracy": 0.7986046511627907, |
|
"eval_loss": 0.8182681798934937, |
|
"eval_runtime": 5.985, |
|
"eval_samples_per_second": 359.23, |
|
"eval_steps_per_second": 0.835, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 1.2527, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"eval_accuracy": 0.804186046511628, |
|
"eval_loss": 0.8000912070274353, |
|
"eval_runtime": 5.9829, |
|
"eval_samples_per_second": 359.357, |
|
"eval_steps_per_second": 0.836, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.2127, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"eval_accuracy": 0.804186046511628, |
|
"eval_loss": 0.7860467433929443, |
|
"eval_runtime": 5.9859, |
|
"eval_samples_per_second": 359.177, |
|
"eval_steps_per_second": 0.835, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 1.1854, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"eval_accuracy": 0.8093023255813954, |
|
"eval_loss": 0.7675830125808716, |
|
"eval_runtime": 6.0054, |
|
"eval_samples_per_second": 358.008, |
|
"eval_steps_per_second": 0.833, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 1.1574, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"eval_accuracy": 0.8079069767441861, |
|
"eval_loss": 0.7555623650550842, |
|
"eval_runtime": 5.9751, |
|
"eval_samples_per_second": 359.829, |
|
"eval_steps_per_second": 0.837, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 3e-05, |
|
"loss": 1.1283, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"eval_accuracy": 0.8130232558139535, |
|
"eval_loss": 0.7396910190582275, |
|
"eval_runtime": 5.9779, |
|
"eval_samples_per_second": 359.658, |
|
"eval_steps_per_second": 0.836, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 1.1302, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_accuracy": 0.8111627906976744, |
|
"eval_loss": 0.7319375276565552, |
|
"eval_runtime": 5.9855, |
|
"eval_samples_per_second": 359.199, |
|
"eval_steps_per_second": 0.835, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 1.1032, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"eval_accuracy": 0.8176744186046512, |
|
"eval_loss": 0.718932569026947, |
|
"eval_runtime": 5.977, |
|
"eval_samples_per_second": 359.71, |
|
"eval_steps_per_second": 0.837, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.0891, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"eval_accuracy": 0.82, |
|
"eval_loss": 0.7135240435600281, |
|
"eval_runtime": 5.9814, |
|
"eval_samples_per_second": 359.449, |
|
"eval_steps_per_second": 0.836, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 1.0738, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"eval_accuracy": 0.8195348837209302, |
|
"eval_loss": 0.7008457779884338, |
|
"eval_runtime": 5.9894, |
|
"eval_samples_per_second": 358.966, |
|
"eval_steps_per_second": 0.835, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 1.0665, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"eval_accuracy": 0.8218604651162791, |
|
"eval_loss": 0.6941251754760742, |
|
"eval_runtime": 5.9997, |
|
"eval_samples_per_second": 358.352, |
|
"eval_steps_per_second": 0.833, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 1.0354, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_accuracy": 0.8223255813953488, |
|
"eval_loss": 0.690199613571167, |
|
"eval_runtime": 5.992, |
|
"eval_samples_per_second": 358.814, |
|
"eval_steps_per_second": 0.834, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.0404, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 29.95, |
|
"eval_accuracy": 0.8237209302325581, |
|
"eval_loss": 0.684846043586731, |
|
"eval_runtime": 5.9904, |
|
"eval_samples_per_second": 358.908, |
|
"eval_steps_per_second": 0.835, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 1.0251, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 30.95, |
|
"eval_accuracy": 0.8218604651162791, |
|
"eval_loss": 0.678667426109314, |
|
"eval_runtime": 5.9867, |
|
"eval_samples_per_second": 359.129, |
|
"eval_steps_per_second": 0.835, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 31.95, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0127, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 31.95, |
|
"eval_accuracy": 0.8246511627906977, |
|
"eval_loss": 0.673900306224823, |
|
"eval_runtime": 5.9929, |
|
"eval_samples_per_second": 358.756, |
|
"eval_steps_per_second": 0.834, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 1.0023, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 32.95, |
|
"eval_accuracy": 0.8255813953488372, |
|
"eval_loss": 0.6712960004806519, |
|
"eval_runtime": 5.9994, |
|
"eval_samples_per_second": 358.368, |
|
"eval_steps_per_second": 0.833, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 33.95, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 1.0012, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 33.95, |
|
"eval_accuracy": 0.8246511627906977, |
|
"eval_loss": 0.6670580506324768, |
|
"eval_runtime": 6.0038, |
|
"eval_samples_per_second": 358.108, |
|
"eval_steps_per_second": 0.833, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 34.95, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.9835, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 34.95, |
|
"eval_accuracy": 0.8251162790697675, |
|
"eval_loss": 0.6612110733985901, |
|
"eval_runtime": 5.9862, |
|
"eval_samples_per_second": 359.158, |
|
"eval_steps_per_second": 0.835, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 35.95, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.982, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 35.95, |
|
"eval_accuracy": 0.8251162790697675, |
|
"eval_loss": 0.6587132215499878, |
|
"eval_runtime": 6.0051, |
|
"eval_samples_per_second": 358.031, |
|
"eval_steps_per_second": 0.833, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.9849, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 36.95, |
|
"eval_accuracy": 0.8251162790697675, |
|
"eval_loss": 0.6563166975975037, |
|
"eval_runtime": 6.0073, |
|
"eval_samples_per_second": 357.899, |
|
"eval_steps_per_second": 0.832, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 37.95, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.9645, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 37.95, |
|
"eval_accuracy": 0.8232558139534883, |
|
"eval_loss": 0.652894914150238, |
|
"eval_runtime": 6.0024, |
|
"eval_samples_per_second": 358.193, |
|
"eval_steps_per_second": 0.833, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.947, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"eval_accuracy": 0.8283720930232558, |
|
"eval_loss": 0.6512119770050049, |
|
"eval_runtime": 5.9854, |
|
"eval_samples_per_second": 359.208, |
|
"eval_steps_per_second": 0.835, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.9563, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"eval_accuracy": 0.8265116279069767, |
|
"eval_loss": 0.6485108733177185, |
|
"eval_runtime": 5.9994, |
|
"eval_samples_per_second": 358.371, |
|
"eval_steps_per_second": 0.833, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 40.95, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9619, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 40.95, |
|
"eval_accuracy": 0.826046511627907, |
|
"eval_loss": 0.6457317471504211, |
|
"eval_runtime": 5.9978, |
|
"eval_samples_per_second": 358.466, |
|
"eval_steps_per_second": 0.834, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 41.95, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.9399, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 41.95, |
|
"eval_accuracy": 0.8283720930232558, |
|
"eval_loss": 0.6446535587310791, |
|
"eval_runtime": 5.9698, |
|
"eval_samples_per_second": 360.144, |
|
"eval_steps_per_second": 0.838, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.9423, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 42.95, |
|
"eval_accuracy": 0.8288372093023256, |
|
"eval_loss": 0.6421455144882202, |
|
"eval_runtime": 5.9718, |
|
"eval_samples_per_second": 360.027, |
|
"eval_steps_per_second": 0.837, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 43.95, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.9482, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 43.95, |
|
"eval_accuracy": 0.8283720930232558, |
|
"eval_loss": 0.6426512002944946, |
|
"eval_runtime": 6.012, |
|
"eval_samples_per_second": 357.619, |
|
"eval_steps_per_second": 0.832, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 44.95, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.9315, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 44.95, |
|
"eval_accuracy": 0.8297674418604651, |
|
"eval_loss": 0.6420783400535583, |
|
"eval_runtime": 6.2892, |
|
"eval_samples_per_second": 341.856, |
|
"eval_steps_per_second": 0.795, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.9411, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"eval_accuracy": 0.8293023255813954, |
|
"eval_loss": 0.6401400566101074, |
|
"eval_runtime": 5.9778, |
|
"eval_samples_per_second": 359.664, |
|
"eval_steps_per_second": 0.836, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 46.95, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.9249, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 46.95, |
|
"eval_accuracy": 0.8297674418604651, |
|
"eval_loss": 0.639735758304596, |
|
"eval_runtime": 5.9937, |
|
"eval_samples_per_second": 358.711, |
|
"eval_steps_per_second": 0.834, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 47.95, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.9361, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 47.95, |
|
"eval_accuracy": 0.8293023255813954, |
|
"eval_loss": 0.6406731605529785, |
|
"eval_runtime": 5.9924, |
|
"eval_samples_per_second": 358.789, |
|
"eval_steps_per_second": 0.834, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.952, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"eval_accuracy": 0.8297674418604651, |
|
"eval_loss": 0.6389557719230652, |
|
"eval_runtime": 5.9868, |
|
"eval_samples_per_second": 359.122, |
|
"eval_steps_per_second": 0.835, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 49.95, |
|
"learning_rate": 0.0, |
|
"loss": 0.9358, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 49.95, |
|
"eval_accuracy": 0.8297674418604651, |
|
"eval_loss": 0.6392757296562195, |
|
"eval_runtime": 6.0005, |
|
"eval_samples_per_second": 358.304, |
|
"eval_steps_per_second": 0.833, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 49.95, |
|
"step": 500, |
|
"total_flos": 1.0940562332139848e+19, |
|
"train_loss": 1.65314315032959, |
|
"train_runtime": 3746.3389, |
|
"train_samples_per_second": 286.947, |
|
"train_steps_per_second": 0.133 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.0940562332139848e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|