|
{ |
|
"best_metric": 0.29478973150253296, |
|
"best_model_checkpoint": "./convnext-nano-3e-4-augment/checkpoint-2750", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 2750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 17.436264038085938, |
|
"learning_rate": 0.00029902226030228247, |
|
"loss": 1.4532, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.700443267822266, |
|
"learning_rate": 0.00029610178754135, |
|
"loss": 0.8119, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.827037773359841, |
|
"eval_loss": 0.5636539459228516, |
|
"eval_runtime": 61.1609, |
|
"eval_samples_per_second": 41.121, |
|
"eval_steps_per_second": 0.654, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 19.5541934967041, |
|
"learning_rate": 0.0002912766545459287, |
|
"loss": 0.7009, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 5.992691516876221, |
|
"learning_rate": 0.0002846097643037037, |
|
"loss": 0.5634, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 10.371554374694824, |
|
"learning_rate": 0.0002761880299246772, |
|
"loss": 0.5829, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8497017892644135, |
|
"eval_loss": 0.5015920400619507, |
|
"eval_runtime": 60.5306, |
|
"eval_samples_per_second": 41.549, |
|
"eval_steps_per_second": 0.661, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 6.42030143737793, |
|
"learning_rate": 0.00026612124159586237, |
|
"loss": 0.5118, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 5.376116752624512, |
|
"learning_rate": 0.00025454063529829405, |
|
"loss": 0.4771, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 9.462407112121582, |
|
"learning_rate": 0.00024159718194531572, |
|
"loss": 0.4623, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8755467196819086, |
|
"eval_loss": 0.44941774010658264, |
|
"eval_runtime": 60.9271, |
|
"eval_samples_per_second": 41.279, |
|
"eval_steps_per_second": 0.657, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 6.973038196563721, |
|
"learning_rate": 0.00022745961924584428, |
|
"loss": 0.3929, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 6.476861953735352, |
|
"learning_rate": 0.00021231225195028297, |
|
"loss": 0.3711, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 6.22141170501709, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.359, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8886679920477137, |
|
"eval_loss": 0.3809404671192169, |
|
"eval_runtime": 61.1213, |
|
"eval_samples_per_second": 41.148, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"grad_norm": 7.65138578414917, |
|
"learning_rate": 0.0001797885699968618, |
|
"loss": 0.3315, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"grad_norm": 4.203142166137695, |
|
"learning_rate": 0.00016283625127182596, |
|
"loss": 0.2881, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8998011928429424, |
|
"eval_loss": 0.3741509020328522, |
|
"eval_runtime": 61.4609, |
|
"eval_samples_per_second": 40.92, |
|
"eval_steps_per_second": 0.651, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"grad_norm": 6.004030227661133, |
|
"learning_rate": 0.00014571659238094556, |
|
"loss": 0.286, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 6.018474578857422, |
|
"learning_rate": 0.00012865277425900724, |
|
"loss": 0.2292, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"grad_norm": 4.614959716796875, |
|
"learning_rate": 0.00011186724987097698, |
|
"loss": 0.2302, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9113320079522863, |
|
"eval_loss": 0.3401913046836853, |
|
"eval_runtime": 61.1216, |
|
"eval_samples_per_second": 41.147, |
|
"eval_steps_per_second": 0.654, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"grad_norm": 4.227890968322754, |
|
"learning_rate": 9.557884419740386e-05, |
|
"loss": 0.2225, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"grad_norm": 4.184410572052002, |
|
"learning_rate": 7.999990151614894e-05, |
|
"loss": 0.1851, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"grad_norm": 6.824435234069824, |
|
"learning_rate": 6.533351716998465e-05, |
|
"loss": 0.1827, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9121272365805169, |
|
"eval_loss": 0.31500810384750366, |
|
"eval_runtime": 60.793, |
|
"eval_samples_per_second": 41.37, |
|
"eval_steps_per_second": 0.658, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"grad_norm": 3.374882221221924, |
|
"learning_rate": 5.1770889908207245e-05, |
|
"loss": 0.1551, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"grad_norm": 4.39635705947876, |
|
"learning_rate": 3.948882931853924e-05, |
|
"loss": 0.1564, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.6348471641540527, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 0.1466, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9228628230616303, |
|
"eval_loss": 0.30119746923446655, |
|
"eval_runtime": 61.3308, |
|
"eval_samples_per_second": 41.007, |
|
"eval_steps_per_second": 0.652, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"grad_norm": 5.662447929382324, |
|
"learning_rate": 1.9388088432033443e-05, |
|
"loss": 0.1236, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"grad_norm": 0.9912707209587097, |
|
"learning_rate": 1.1831452032772498e-05, |
|
"loss": 0.1223, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9248508946322067, |
|
"eval_loss": 0.2995615005493164, |
|
"eval_runtime": 60.6772, |
|
"eval_samples_per_second": 41.449, |
|
"eval_steps_per_second": 0.659, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 2.5069386959075928, |
|
"learning_rate": 6.076053957825411e-06, |
|
"loss": 0.1283, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"grad_norm": 7.067609786987305, |
|
"learning_rate": 2.1969246228460523e-06, |
|
"loss": 0.1142, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"grad_norm": 4.694984436035156, |
|
"learning_rate": 2.4463441107965276e-07, |
|
"loss": 0.1332, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9248508946322067, |
|
"eval_loss": 0.29478973150253296, |
|
"eval_runtime": 61.4481, |
|
"eval_samples_per_second": 40.929, |
|
"eval_steps_per_second": 0.651, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 2750, |
|
"total_flos": 7.000491898906214e+18, |
|
"train_loss": 0.35536065743186257, |
|
"train_runtime": 6476.8205, |
|
"train_samples_per_second": 27.144, |
|
"train_steps_per_second": 0.425 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2750, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 7.000491898906214e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|