|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.999789127409219, |
|
"global_step": 59265, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.994913523145705e-06, |
|
"loss": 0.3404, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.887192844025166, |
|
"eval_f1": 0.886488607488543, |
|
"eval_loss": 0.4329048991203308, |
|
"eval_runtime": 42.8766, |
|
"eval_samples_per_second": 3340.027, |
|
"eval_steps_per_second": 69.595, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 6.954310370186513e-06, |
|
"loss": 0.3433, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.8883170750441662, |
|
"eval_f1": 0.8876090017723502, |
|
"eval_loss": 0.4280129075050354, |
|
"eval_runtime": 46.562, |
|
"eval_samples_per_second": 3075.664, |
|
"eval_steps_per_second": 64.087, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.873575782958537e-06, |
|
"loss": 0.3281, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.8889804411733899, |
|
"eval_f1": 0.8882523897529724, |
|
"eval_loss": 0.43023422360420227, |
|
"eval_runtime": 45.3226, |
|
"eval_samples_per_second": 3159.767, |
|
"eval_steps_per_second": 65.839, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.753647718516397e-06, |
|
"loss": 0.331, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.8891340628033154, |
|
"eval_f1": 0.8885291871869458, |
|
"eval_loss": 0.42653217911720276, |
|
"eval_runtime": 54.726, |
|
"eval_samples_per_second": 2616.839, |
|
"eval_steps_per_second": 54.526, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 6.5959194752957715e-06, |
|
"loss": 0.3224, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.8881355222087998, |
|
"eval_f1": 0.8873919365623191, |
|
"eval_loss": 0.43002641201019287, |
|
"eval_runtime": 54.91, |
|
"eval_samples_per_second": 2608.066, |
|
"eval_steps_per_second": 54.343, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 6.402223506072121e-06, |
|
"loss": 0.3361, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.8889175959611477, |
|
"eval_f1": 0.8882468792028413, |
|
"eval_loss": 0.42911502718925476, |
|
"eval_runtime": 50.9856, |
|
"eval_samples_per_second": 2808.81, |
|
"eval_steps_per_second": 58.526, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 6.1748101289188055e-06, |
|
"loss": 0.3323, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.8877584509353462, |
|
"eval_f1": 0.8871109456457335, |
|
"eval_loss": 0.4336757957935333, |
|
"eval_runtime": 51.7236, |
|
"eval_samples_per_second": 2768.739, |
|
"eval_steps_per_second": 57.691, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 5.916321383496139e-06, |
|
"loss": 0.3556, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.8856845589313521, |
|
"eval_f1": 0.8851065217265167, |
|
"eval_loss": 0.4344990849494934, |
|
"eval_runtime": 50.059, |
|
"eval_samples_per_second": 2860.803, |
|
"eval_steps_per_second": 59.61, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 5.629760336403372e-06, |
|
"loss": 0.3663, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 0.8836455809341591, |
|
"eval_f1": 0.8828440694403326, |
|
"eval_loss": 0.44171223044395447, |
|
"eval_runtime": 52.3029, |
|
"eval_samples_per_second": 2738.072, |
|
"eval_steps_per_second": 57.052, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 5.318456192197413e-06, |
|
"loss": 0.3902, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_accuracy": 0.8789252072146304, |
|
"eval_f1": 0.8780871328644805, |
|
"eval_loss": 0.4555477797985077, |
|
"eval_runtime": 50.7366, |
|
"eval_samples_per_second": 2822.599, |
|
"eval_steps_per_second": 58.814, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.986025615410962e-06, |
|
"loss": 0.4036, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_accuracy": 0.8788483963996676, |
|
"eval_f1": 0.877917228234661, |
|
"eval_loss": 0.4555710554122925, |
|
"eval_runtime": 49.8006, |
|
"eval_samples_per_second": 2875.647, |
|
"eval_steps_per_second": 59.919, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 4.636330712922542e-06, |
|
"loss": 0.4305, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"eval_accuracy": 0.8750637180624122, |
|
"eval_f1": 0.8741648169470942, |
|
"eval_loss": 0.46971216797828674, |
|
"eval_runtime": 50.8439, |
|
"eval_samples_per_second": 2816.642, |
|
"eval_steps_per_second": 58.689, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 4.273434164830173e-06, |
|
"loss": 0.4501, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_accuracy": 0.8737788826121263, |
|
"eval_f1": 0.8725169321502271, |
|
"eval_loss": 0.4762924611568451, |
|
"eval_runtime": 50.5449, |
|
"eval_samples_per_second": 2833.301, |
|
"eval_steps_per_second": 59.037, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 3.901552025108571e-06, |
|
"loss": 0.4733, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"eval_accuracy": 0.8710486072802687, |
|
"eval_f1": 0.8700483733487925, |
|
"eval_loss": 0.48572415113449097, |
|
"eval_runtime": 51.8675, |
|
"eval_samples_per_second": 2761.054, |
|
"eval_steps_per_second": 57.531, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 3.5250047404016715e-06, |
|
"loss": 0.4851, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"eval_accuracy": 0.8705109315755295, |
|
"eval_f1": 0.8695447653344123, |
|
"eval_loss": 0.4862508177757263, |
|
"eval_runtime": 50.7033, |
|
"eval_samples_per_second": 2824.454, |
|
"eval_steps_per_second": 58.852, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 3.1481669560035742e-06, |
|
"loss": 0.4846, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.8708251576367407, |
|
"eval_f1": 0.869757227158951, |
|
"eval_loss": 0.48489654064178467, |
|
"eval_runtime": 50.2252, |
|
"eval_samples_per_second": 2851.338, |
|
"eval_steps_per_second": 59.412, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 2.775416692171258e-06, |
|
"loss": 0.4856, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_accuracy": 0.870664553205455, |
|
"eval_f1": 0.8694665590028052, |
|
"eval_loss": 0.4835050404071808, |
|
"eval_runtime": 50.112, |
|
"eval_samples_per_second": 2857.776, |
|
"eval_steps_per_second": 59.547, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 2.411084481227692e-06, |
|
"loss": 0.4774, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_accuracy": 0.8719214574502999, |
|
"eval_f1": 0.870818507712314, |
|
"eval_loss": 0.47967976331710815, |
|
"eval_runtime": 51.643, |
|
"eval_samples_per_second": 2773.058, |
|
"eval_steps_per_second": 57.781, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 2.059403056369544e-06, |
|
"loss": 0.4635, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_accuracy": 0.8728222388257721, |
|
"eval_f1": 0.8716593868274884, |
|
"eval_loss": 0.47762736678123474, |
|
"eval_runtime": 49.924, |
|
"eval_samples_per_second": 2868.539, |
|
"eval_steps_per_second": 59.771, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 1.7244581766840846e-06, |
|
"loss": 0.4561, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"eval_accuracy": 0.8738906074338904, |
|
"eval_f1": 0.8728714900247343, |
|
"eval_loss": 0.4745886027812958, |
|
"eval_runtime": 52.1859, |
|
"eval_samples_per_second": 2744.206, |
|
"eval_steps_per_second": 57.18, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 1.4101411596796607e-06, |
|
"loss": 0.4475, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"eval_accuracy": 0.8749380276379278, |
|
"eval_f1": 0.8739688992879336, |
|
"eval_loss": 0.4705161154270172, |
|
"eval_runtime": 52.4489, |
|
"eval_samples_per_second": 2730.449, |
|
"eval_steps_per_second": 56.893, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 1.1201036727965609e-06, |
|
"loss": 0.4413, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"eval_accuracy": 0.8754268237331453, |
|
"eval_f1": 0.8743869003587098, |
|
"eval_loss": 0.46908074617385864, |
|
"eval_runtime": 50.2149, |
|
"eval_samples_per_second": 2851.921, |
|
"eval_steps_per_second": 59.425, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 8.577153091208042e-07, |
|
"loss": 0.4389, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"eval_accuracy": 0.8759644994378845, |
|
"eval_f1": 0.875008769768229, |
|
"eval_loss": 0.46789219975471497, |
|
"eval_runtime": 50.3984, |
|
"eval_samples_per_second": 2841.539, |
|
"eval_steps_per_second": 59.208, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 6.260244401770287e-07, |
|
"loss": 0.4361, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"eval_accuracy": 0.875894671424282, |
|
"eval_f1": 0.8749081220424403, |
|
"eval_loss": 0.46769237518310547, |
|
"eval_runtime": 50.042, |
|
"eval_samples_per_second": 2861.778, |
|
"eval_steps_per_second": 59.63, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 4.2772280060435426e-07, |
|
"loss": 0.4362, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"eval_accuracy": 0.876257777095015, |
|
"eval_f1": 0.875265867499827, |
|
"eval_loss": 0.4671792984008789, |
|
"eval_runtime": 52.5275, |
|
"eval_samples_per_second": 2726.361, |
|
"eval_steps_per_second": 56.808, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 2.6511421616278837e-07, |
|
"loss": 0.4309, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"eval_accuracy": 0.87611812106781, |
|
"eval_f1": 0.8751424993393537, |
|
"eval_loss": 0.4671061038970947, |
|
"eval_runtime": 51.1386, |
|
"eval_samples_per_second": 2800.408, |
|
"eval_steps_per_second": 58.351, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 1.400878383814987e-07, |
|
"loss": 0.4316, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"eval_accuracy": 0.8764183815263007, |
|
"eval_f1": 0.8754265388560385, |
|
"eval_loss": 0.4670025110244751, |
|
"eval_runtime": 51.6718, |
|
"eval_samples_per_second": 2771.511, |
|
"eval_steps_per_second": 57.749, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 5.409619680302491e-08, |
|
"loss": 0.4321, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"eval_accuracy": 0.8764393299303814, |
|
"eval_f1": 0.8754605862048482, |
|
"eval_loss": 0.4668178856372833, |
|
"eval_runtime": 50.8951, |
|
"eval_samples_per_second": 2813.807, |
|
"eval_steps_per_second": 58.63, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 8.138323807738212e-09, |
|
"loss": 0.4311, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"eval_accuracy": 0.8763904503208597, |
|
"eval_f1": 0.8754165344722714, |
|
"eval_loss": 0.46675533056259155, |
|
"eval_runtime": 50.6992, |
|
"eval_samples_per_second": 2824.68, |
|
"eval_steps_per_second": 58.857, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 59265, |
|
"total_flos": 3.822729571998278e+17, |
|
"train_loss": 0.4135634679527994, |
|
"train_runtime": 19096.9194, |
|
"train_samples_per_second": 893.953, |
|
"train_steps_per_second": 3.103 |
|
} |
|
], |
|
"max_steps": 59265, |
|
"num_train_epochs": 15, |
|
"total_flos": 3.822729571998278e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|