|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 72.99270072992701, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.875e-05, |
|
"loss": 10.1798, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_test_accuracy": 0.0, |
|
"eval_test_loss": 3.4965455532073975, |
|
"eval_test_runtime": 7.5045, |
|
"eval_test_samples_per_second": 162.702, |
|
"eval_test_steps_per_second": 2.665, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.75e-05, |
|
"loss": 4.0133, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_test_accuracy": 0.085995085995086, |
|
"eval_test_loss": 2.1827144622802734, |
|
"eval_test_runtime": 11.227, |
|
"eval_test_samples_per_second": 108.755, |
|
"eval_test_steps_per_second": 1.781, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 2.1831, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_test_accuracy": 0.28992628992628994, |
|
"eval_test_loss": 1.1154128313064575, |
|
"eval_test_runtime": 4.9154, |
|
"eval_test_samples_per_second": 248.402, |
|
"eval_test_steps_per_second": 4.069, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.178, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_test_accuracy": 0.4430794430794431, |
|
"eval_test_loss": 0.7581946849822998, |
|
"eval_test_runtime": 5.2157, |
|
"eval_test_samples_per_second": 234.1, |
|
"eval_test_steps_per_second": 3.835, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.8603, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_test_accuracy": 0.506961506961507, |
|
"eval_test_loss": 0.6857301592826843, |
|
"eval_test_runtime": 4.1605, |
|
"eval_test_samples_per_second": 293.474, |
|
"eval_test_steps_per_second": 4.807, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.7179, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"eval_test_accuracy": 0.5356265356265356, |
|
"eval_test_loss": 0.629558801651001, |
|
"eval_test_runtime": 4.1543, |
|
"eval_test_samples_per_second": 293.909, |
|
"eval_test_steps_per_second": 4.814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.6347, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"eval_test_accuracy": 0.556920556920557, |
|
"eval_test_loss": 0.6828880310058594, |
|
"eval_test_runtime": 4.1527, |
|
"eval_test_samples_per_second": 294.026, |
|
"eval_test_steps_per_second": 4.816, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5714, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"eval_test_accuracy": 0.5683865683865684, |
|
"eval_test_loss": 0.6402557492256165, |
|
"eval_test_runtime": 4.1126, |
|
"eval_test_samples_per_second": 296.89, |
|
"eval_test_steps_per_second": 4.863, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.535, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"eval_test_accuracy": 0.5823095823095823, |
|
"eval_test_loss": 0.6427932381629944, |
|
"eval_test_runtime": 4.1425, |
|
"eval_test_samples_per_second": 294.751, |
|
"eval_test_steps_per_second": 4.828, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4864, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"eval_test_accuracy": 0.5749385749385749, |
|
"eval_test_loss": 0.6692995429039001, |
|
"eval_test_runtime": 4.1218, |
|
"eval_test_samples_per_second": 296.233, |
|
"eval_test_steps_per_second": 4.852, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.4523, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"eval_test_accuracy": 0.588042588042588, |
|
"eval_test_loss": 0.6854296326637268, |
|
"eval_test_runtime": 4.1256, |
|
"eval_test_samples_per_second": 295.954, |
|
"eval_test_steps_per_second": 4.848, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4267, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"eval_test_accuracy": 0.5847665847665847, |
|
"eval_test_loss": 0.6832742691040039, |
|
"eval_test_runtime": 4.114, |
|
"eval_test_samples_per_second": 296.79, |
|
"eval_test_steps_per_second": 4.861, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.4017, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 23.72, |
|
"eval_test_accuracy": 0.5864045864045864, |
|
"eval_test_loss": 0.7026733756065369, |
|
"eval_test_runtime": 4.162, |
|
"eval_test_samples_per_second": 293.366, |
|
"eval_test_steps_per_second": 4.805, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3737, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.55, |
|
"eval_test_accuracy": 0.5823095823095823, |
|
"eval_test_loss": 0.7358095645904541, |
|
"eval_test_runtime": 4.1139, |
|
"eval_test_samples_per_second": 296.797, |
|
"eval_test_steps_per_second": 4.862, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.3567, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"eval_test_accuracy": 0.583947583947584, |
|
"eval_test_loss": 0.7573221921920776, |
|
"eval_test_runtime": 4.1462, |
|
"eval_test_samples_per_second": 294.489, |
|
"eval_test_steps_per_second": 4.824, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3329, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"eval_test_accuracy": 0.5831285831285832, |
|
"eval_test_loss": 0.7671645283699036, |
|
"eval_test_runtime": 4.1876, |
|
"eval_test_samples_per_second": 291.577, |
|
"eval_test_steps_per_second": 4.776, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 0.3178, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 31.02, |
|
"eval_test_accuracy": 0.5937755937755937, |
|
"eval_test_loss": 0.8280954360961914, |
|
"eval_test_runtime": 4.1401, |
|
"eval_test_samples_per_second": 294.919, |
|
"eval_test_steps_per_second": 4.831, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 32.85, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3031, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 32.85, |
|
"eval_test_accuracy": 0.5954135954135954, |
|
"eval_test_loss": 0.8298905491828918, |
|
"eval_test_runtime": 4.1172, |
|
"eval_test_samples_per_second": 296.557, |
|
"eval_test_steps_per_second": 4.858, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.2942, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"eval_test_accuracy": 0.592956592956593, |
|
"eval_test_loss": 0.8406508564949036, |
|
"eval_test_runtime": 4.1428, |
|
"eval_test_samples_per_second": 294.726, |
|
"eval_test_steps_per_second": 4.828, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2794, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 36.5, |
|
"eval_test_accuracy": 0.6003276003276004, |
|
"eval_test_loss": 0.8442530035972595, |
|
"eval_test_runtime": 4.3235, |
|
"eval_test_samples_per_second": 282.409, |
|
"eval_test_steps_per_second": 4.626, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.2733, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"eval_test_accuracy": 0.6052416052416052, |
|
"eval_test_loss": 0.8638033270835876, |
|
"eval_test_runtime": 4.1266, |
|
"eval_test_samples_per_second": 295.887, |
|
"eval_test_steps_per_second": 4.847, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 40.15, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2631, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 40.15, |
|
"eval_test_accuracy": 0.5888615888615889, |
|
"eval_test_loss": 0.890779435634613, |
|
"eval_test_runtime": 4.1284, |
|
"eval_test_samples_per_second": 295.759, |
|
"eval_test_steps_per_second": 4.845, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.2574, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"eval_test_accuracy": 0.588042588042588, |
|
"eval_test_loss": 0.9194920063018799, |
|
"eval_test_runtime": 4.2329, |
|
"eval_test_samples_per_second": 288.451, |
|
"eval_test_steps_per_second": 4.725, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2445, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 43.8, |
|
"eval_test_accuracy": 0.5913185913185913, |
|
"eval_test_loss": 0.9236257672309875, |
|
"eval_test_runtime": 4.1684, |
|
"eval_test_samples_per_second": 292.916, |
|
"eval_test_steps_per_second": 4.798, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.2417, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 45.62, |
|
"eval_test_accuracy": 0.5913185913185913, |
|
"eval_test_loss": 0.9303093552589417, |
|
"eval_test_runtime": 4.1896, |
|
"eval_test_samples_per_second": 291.435, |
|
"eval_test_steps_per_second": 4.774, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2316, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 47.45, |
|
"eval_test_accuracy": 0.6060606060606061, |
|
"eval_test_loss": 0.9456475377082825, |
|
"eval_test_runtime": 4.1609, |
|
"eval_test_samples_per_second": 293.446, |
|
"eval_test_steps_per_second": 4.807, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 49.27, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.227, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 49.27, |
|
"eval_test_accuracy": 0.5978705978705978, |
|
"eval_test_loss": 0.9745798110961914, |
|
"eval_test_runtime": 4.1394, |
|
"eval_test_samples_per_second": 294.971, |
|
"eval_test_steps_per_second": 4.832, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 51.09, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2241, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 51.09, |
|
"eval_test_accuracy": 0.6052416052416052, |
|
"eval_test_loss": 0.938654899597168, |
|
"eval_test_runtime": 4.1652, |
|
"eval_test_samples_per_second": 293.143, |
|
"eval_test_steps_per_second": 4.802, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.2174, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 52.92, |
|
"eval_test_accuracy": 0.5986895986895987, |
|
"eval_test_loss": 0.9762380719184875, |
|
"eval_test_runtime": 4.2021, |
|
"eval_test_samples_per_second": 290.57, |
|
"eval_test_steps_per_second": 4.76, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 54.74, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.212, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 54.74, |
|
"eval_test_accuracy": 0.601965601965602, |
|
"eval_test_loss": 0.9834132194519043, |
|
"eval_test_runtime": 4.1906, |
|
"eval_test_samples_per_second": 291.369, |
|
"eval_test_steps_per_second": 4.773, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 56.57, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.206, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 56.57, |
|
"eval_test_accuracy": 0.5995085995085995, |
|
"eval_test_loss": 0.9860948920249939, |
|
"eval_test_runtime": 4.1715, |
|
"eval_test_samples_per_second": 292.702, |
|
"eval_test_steps_per_second": 4.794, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 58.39, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2057, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 58.39, |
|
"eval_test_accuracy": 0.5962325962325963, |
|
"eval_test_loss": 1.0094884634017944, |
|
"eval_test_runtime": 4.2216, |
|
"eval_test_samples_per_second": 289.23, |
|
"eval_test_steps_per_second": 4.738, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 60.22, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.2023, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 60.22, |
|
"eval_test_accuracy": 0.597051597051597, |
|
"eval_test_loss": 1.000124216079712, |
|
"eval_test_runtime": 4.1702, |
|
"eval_test_samples_per_second": 292.793, |
|
"eval_test_steps_per_second": 4.796, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 62.04, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1994, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 62.04, |
|
"eval_test_accuracy": 0.5995085995085995, |
|
"eval_test_loss": 1.0179657936096191, |
|
"eval_test_runtime": 4.1982, |
|
"eval_test_samples_per_second": 290.842, |
|
"eval_test_steps_per_second": 4.764, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 63.87, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1967, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 63.87, |
|
"eval_test_accuracy": 0.6044226044226044, |
|
"eval_test_loss": 1.0143113136291504, |
|
"eval_test_runtime": 4.1544, |
|
"eval_test_samples_per_second": 293.907, |
|
"eval_test_steps_per_second": 4.814, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 65.69, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1915, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 65.69, |
|
"eval_test_accuracy": 0.6011466011466011, |
|
"eval_test_loss": 1.0377224683761597, |
|
"eval_test_runtime": 4.1791, |
|
"eval_test_samples_per_second": 292.165, |
|
"eval_test_steps_per_second": 4.786, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 67.52, |
|
"learning_rate": 3.75e-06, |
|
"loss": 0.1934, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 67.52, |
|
"eval_test_accuracy": 0.601965601965602, |
|
"eval_test_loss": 1.02960205078125, |
|
"eval_test_runtime": 4.2049, |
|
"eval_test_samples_per_second": 290.372, |
|
"eval_test_steps_per_second": 4.756, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 69.34, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1932, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 69.34, |
|
"eval_test_accuracy": 0.601965601965602, |
|
"eval_test_loss": 1.0294890403747559, |
|
"eval_test_runtime": 4.1796, |
|
"eval_test_samples_per_second": 292.135, |
|
"eval_test_steps_per_second": 4.785, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 71.17, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.1898, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 71.17, |
|
"eval_test_accuracy": 0.6011466011466011, |
|
"eval_test_loss": 1.0313055515289307, |
|
"eval_test_runtime": 4.1318, |
|
"eval_test_samples_per_second": 295.51, |
|
"eval_test_steps_per_second": 4.84, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.1916, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_test_accuracy": 0.6011466011466011, |
|
"eval_test_loss": 1.0304898023605347, |
|
"eval_test_runtime": 4.1756, |
|
"eval_test_samples_per_second": 292.415, |
|
"eval_test_steps_per_second": 4.79, |
|
"step": 10000 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 73, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|