|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 2188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.997714808043876e-05, |
|
"loss": 5.5906, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9017367458866546e-05, |
|
"loss": 3.8301, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.7008, |
|
"eval_f1": 0.7372365339578454, |
|
"eval_loss": 1.7894935607910156, |
|
"eval_precision": 0.6572025052192066, |
|
"eval_recall": 0.8394666666666667, |
|
"eval_runtime": 100.3646, |
|
"eval_samples_per_second": 37.364, |
|
"eval_steps_per_second": 4.673, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8034734917733096e-05, |
|
"loss": 1.7461, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.7669333333333334, |
|
"eval_f1": 0.8065515714918104, |
|
"eval_loss": 1.6799604892730713, |
|
"eval_precision": 0.6893681422625804, |
|
"eval_recall": 0.9717333333333333, |
|
"eval_runtime": 100.5302, |
|
"eval_samples_per_second": 37.302, |
|
"eval_steps_per_second": 4.665, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.705210237659964e-05, |
|
"loss": 1.5511, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.8925333333333333, |
|
"eval_f1": 0.8882728028832826, |
|
"eval_loss": 0.5680333375930786, |
|
"eval_precision": 0.9249422632794457, |
|
"eval_recall": 0.8544, |
|
"eval_runtime": 100.4817, |
|
"eval_samples_per_second": 37.32, |
|
"eval_steps_per_second": 4.668, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.606946983546618e-05, |
|
"loss": 1.2304, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.7061333333333333, |
|
"eval_f1": 0.5838368580060423, |
|
"eval_loss": 4.393102169036865, |
|
"eval_precision": 1.0, |
|
"eval_recall": 0.41226666666666667, |
|
"eval_runtime": 100.2927, |
|
"eval_samples_per_second": 37.391, |
|
"eval_steps_per_second": 4.676, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5086837294332726e-05, |
|
"loss": 1.6111, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.7248, |
|
"eval_f1": 0.7810776410691558, |
|
"eval_loss": 1.275352954864502, |
|
"eval_precision": 0.6484677703416696, |
|
"eval_recall": 0.9818666666666667, |
|
"eval_runtime": 100.7588, |
|
"eval_samples_per_second": 37.218, |
|
"eval_steps_per_second": 4.655, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.410420475319927e-05, |
|
"loss": 1.1002, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.848, |
|
"eval_f1": 0.8375142531356898, |
|
"eval_loss": 0.5289755463600159, |
|
"eval_precision": 0.8995713410900184, |
|
"eval_recall": 0.7834666666666666, |
|
"eval_runtime": 100.7268, |
|
"eval_samples_per_second": 37.229, |
|
"eval_steps_per_second": 4.656, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.312157221206581e-05, |
|
"loss": 0.6462, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.864, |
|
"eval_f1": 0.8452669902912622, |
|
"eval_loss": 0.6743063926696777, |
|
"eval_precision": 0.9802955665024631, |
|
"eval_recall": 0.7429333333333333, |
|
"eval_runtime": 100.4683, |
|
"eval_samples_per_second": 37.325, |
|
"eval_steps_per_second": 4.668, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.213893967093236e-05, |
|
"loss": 0.6835, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.6992, |
|
"eval_f1": 0.7657807308970099, |
|
"eval_loss": 0.9862114787101746, |
|
"eval_precision": 0.6269976198571914, |
|
"eval_recall": 0.9834666666666667, |
|
"eval_runtime": 100.9462, |
|
"eval_samples_per_second": 37.148, |
|
"eval_steps_per_second": 4.646, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1156307129798905e-05, |
|
"loss": 0.5506, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.7984, |
|
"eval_f1": 0.7476635514018691, |
|
"eval_loss": 1.4477732181549072, |
|
"eval_precision": 0.9991079393398751, |
|
"eval_recall": 0.5973333333333334, |
|
"eval_runtime": 101.0116, |
|
"eval_samples_per_second": 37.124, |
|
"eval_steps_per_second": 4.643, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.017367458866545e-05, |
|
"loss": 1.0906, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.9165333333333333, |
|
"eval_f1": 0.9144575020497402, |
|
"eval_loss": 0.2948032021522522, |
|
"eval_precision": 0.9377802690582959, |
|
"eval_recall": 0.8922666666666667, |
|
"eval_runtime": 100.9951, |
|
"eval_samples_per_second": 37.131, |
|
"eval_steps_per_second": 4.644, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.919104204753199e-05, |
|
"loss": 0.4459, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9082666666666667, |
|
"eval_f1": 0.9078242229367631, |
|
"eval_loss": 0.5833027362823486, |
|
"eval_precision": 0.9122240172320948, |
|
"eval_recall": 0.9034666666666666, |
|
"eval_runtime": 100.5094, |
|
"eval_samples_per_second": 37.31, |
|
"eval_steps_per_second": 4.666, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.820840950639854e-05, |
|
"loss": 0.4996, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.9210666666666667, |
|
"eval_f1": 0.9218172213417856, |
|
"eval_loss": 0.6353188157081604, |
|
"eval_precision": 0.913134484563056, |
|
"eval_recall": 0.9306666666666666, |
|
"eval_runtime": 100.6316, |
|
"eval_samples_per_second": 37.265, |
|
"eval_steps_per_second": 4.661, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7225776965265085e-05, |
|
"loss": 0.4613, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.9290666666666667, |
|
"eval_f1": 0.926923076923077, |
|
"eval_loss": 0.43136465549468994, |
|
"eval_precision": 0.9558073654390935, |
|
"eval_recall": 0.8997333333333334, |
|
"eval_runtime": 100.9532, |
|
"eval_samples_per_second": 37.146, |
|
"eval_steps_per_second": 4.646, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.624314442413163e-05, |
|
"loss": 0.3868, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.8997333333333334, |
|
"eval_f1": 0.8932424758659853, |
|
"eval_loss": 0.6684072017669678, |
|
"eval_precision": 0.9550698239222829, |
|
"eval_recall": 0.8389333333333333, |
|
"eval_runtime": 100.7051, |
|
"eval_samples_per_second": 37.237, |
|
"eval_steps_per_second": 4.657, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.526051188299818e-05, |
|
"loss": 0.3524, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.9269333333333334, |
|
"eval_f1": 0.9261057173678533, |
|
"eval_loss": 0.49305853247642517, |
|
"eval_precision": 0.9367157665030006, |
|
"eval_recall": 0.9157333333333333, |
|
"eval_runtime": 101.0904, |
|
"eval_samples_per_second": 37.096, |
|
"eval_steps_per_second": 4.639, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4277879341864715e-05, |
|
"loss": 0.4739, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.9192, |
|
"eval_f1": 0.9207428720899816, |
|
"eval_loss": 0.4122666120529175, |
|
"eval_precision": 0.9034907597535934, |
|
"eval_recall": 0.9386666666666666, |
|
"eval_runtime": 101.3837, |
|
"eval_samples_per_second": 36.988, |
|
"eval_steps_per_second": 4.626, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.329524680073126e-05, |
|
"loss": 0.5484, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.6853333333333333, |
|
"eval_f1": 0.7321833862914208, |
|
"eval_loss": 0.6493818759918213, |
|
"eval_precision": 0.6372975108652706, |
|
"eval_recall": 0.8602666666666666, |
|
"eval_runtime": 100.5978, |
|
"eval_samples_per_second": 37.277, |
|
"eval_steps_per_second": 4.662, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.231261425959781e-05, |
|
"loss": 0.4939, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.8562666666666666, |
|
"eval_f1": 0.8371109096403747, |
|
"eval_loss": 0.43780916929244995, |
|
"eval_precision": 0.9658298465829847, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 101.7646, |
|
"eval_samples_per_second": 36.85, |
|
"eval_steps_per_second": 4.609, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.132998171846435e-05, |
|
"loss": 0.3646, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.9093333333333333, |
|
"eval_f1": 0.9018475750577367, |
|
"eval_loss": 0.5472243428230286, |
|
"eval_precision": 0.9830081812460667, |
|
"eval_recall": 0.8330666666666666, |
|
"eval_runtime": 100.6656, |
|
"eval_samples_per_second": 37.252, |
|
"eval_steps_per_second": 4.659, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.03473491773309e-05, |
|
"loss": 0.3954, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.9216, |
|
"eval_f1": 0.9219330855018588, |
|
"eval_loss": 0.3911387324333191, |
|
"eval_precision": 0.9180327868852459, |
|
"eval_recall": 0.9258666666666666, |
|
"eval_runtime": 101.1862, |
|
"eval_samples_per_second": 37.06, |
|
"eval_steps_per_second": 4.635, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9364716636197442e-05, |
|
"loss": 0.3898, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.8677333333333334, |
|
"eval_f1": 0.8485958485958486, |
|
"eval_loss": 0.608423113822937, |
|
"eval_precision": 0.9921484653818701, |
|
"eval_recall": 0.7413333333333333, |
|
"eval_runtime": 100.7786, |
|
"eval_samples_per_second": 37.21, |
|
"eval_steps_per_second": 4.654, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.838208409506399e-05, |
|
"loss": 0.5361, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.9045333333333333, |
|
"eval_f1": 0.8954439252336449, |
|
"eval_loss": 0.5845316648483276, |
|
"eval_precision": 0.9896707553260168, |
|
"eval_recall": 0.8176, |
|
"eval_runtime": 100.5268, |
|
"eval_samples_per_second": 37.303, |
|
"eval_steps_per_second": 4.665, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.739945155393053e-05, |
|
"loss": 0.3488, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9032, |
|
"eval_f1": 0.90854119425548, |
|
"eval_loss": 0.6275143027305603, |
|
"eval_precision": 0.8610315186246418, |
|
"eval_recall": 0.9616, |
|
"eval_runtime": 100.5257, |
|
"eval_samples_per_second": 37.304, |
|
"eval_steps_per_second": 4.665, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.641681901279708e-05, |
|
"loss": 0.6024, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.7704, |
|
"eval_f1": 0.8113910186199343, |
|
"eval_loss": 1.486302375793457, |
|
"eval_precision": 0.6884758364312268, |
|
"eval_recall": 0.9877333333333334, |
|
"eval_runtime": 100.9298, |
|
"eval_samples_per_second": 37.155, |
|
"eval_steps_per_second": 4.647, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5434186471663625e-05, |
|
"loss": 0.6391, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.9296, |
|
"eval_f1": 0.9263803680981595, |
|
"eval_loss": 0.47947388887405396, |
|
"eval_precision": 0.9707773232028054, |
|
"eval_recall": 0.8858666666666667, |
|
"eval_runtime": 100.5142, |
|
"eval_samples_per_second": 37.308, |
|
"eval_steps_per_second": 4.666, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4451553930530165e-05, |
|
"loss": 0.6603, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.9266666666666666, |
|
"eval_f1": 0.9240541286937309, |
|
"eval_loss": 0.35503658652305603, |
|
"eval_precision": 0.9581901489117984, |
|
"eval_recall": 0.8922666666666667, |
|
"eval_runtime": 100.9348, |
|
"eval_samples_per_second": 37.153, |
|
"eval_steps_per_second": 4.647, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.346892138939671e-05, |
|
"loss": 0.3186, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.9317333333333333, |
|
"eval_f1": 0.9288493607559756, |
|
"eval_loss": 0.4835192561149597, |
|
"eval_precision": 0.9698200812536274, |
|
"eval_recall": 0.8912, |
|
"eval_runtime": 100.4468, |
|
"eval_samples_per_second": 37.333, |
|
"eval_steps_per_second": 4.669, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2486288848263255e-05, |
|
"loss": 0.3122, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.9328, |
|
"eval_f1": 0.9340314136125655, |
|
"eval_loss": 0.31904712319374084, |
|
"eval_precision": 0.9172236503856042, |
|
"eval_recall": 0.9514666666666667, |
|
"eval_runtime": 100.4601, |
|
"eval_samples_per_second": 37.328, |
|
"eval_steps_per_second": 4.669, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1503656307129798e-05, |
|
"loss": 0.4917, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.9192, |
|
"eval_f1": 0.9151498179781573, |
|
"eval_loss": 0.45495307445526123, |
|
"eval_precision": 0.9634433962264151, |
|
"eval_recall": 0.8714666666666666, |
|
"eval_runtime": 100.5501, |
|
"eval_samples_per_second": 37.295, |
|
"eval_steps_per_second": 4.664, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0521023765996345e-05, |
|
"loss": 0.448, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.936, |
|
"eval_f1": 0.9338113623827909, |
|
"eval_loss": 0.18482676148414612, |
|
"eval_precision": 0.9668760708166761, |
|
"eval_recall": 0.9029333333333334, |
|
"eval_runtime": 100.3779, |
|
"eval_samples_per_second": 37.359, |
|
"eval_steps_per_second": 4.672, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.953839122486289e-05, |
|
"loss": 0.3064, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.9402666666666667, |
|
"eval_f1": 0.9405204460966542, |
|
"eval_loss": 0.23262375593185425, |
|
"eval_precision": 0.9365415124272871, |
|
"eval_recall": 0.9445333333333333, |
|
"eval_runtime": 101.1761, |
|
"eval_samples_per_second": 37.064, |
|
"eval_steps_per_second": 4.635, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8555758683729435e-05, |
|
"loss": 0.2274, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.924, |
|
"eval_f1": 0.9256844850065189, |
|
"eval_loss": 0.28850072622299194, |
|
"eval_precision": 0.9056122448979592, |
|
"eval_recall": 0.9466666666666667, |
|
"eval_runtime": 100.8319, |
|
"eval_samples_per_second": 37.191, |
|
"eval_steps_per_second": 4.651, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7573126142595978e-05, |
|
"loss": 0.3728, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.9301333333333334, |
|
"eval_f1": 0.9273433166943982, |
|
"eval_loss": 0.29230690002441406, |
|
"eval_precision": 0.9659156556903524, |
|
"eval_recall": 0.8917333333333334, |
|
"eval_runtime": 100.785, |
|
"eval_samples_per_second": 37.208, |
|
"eval_steps_per_second": 4.653, |
|
"step": 1419 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.659049360146252e-05, |
|
"loss": 0.5048, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9464, |
|
"eval_f1": 0.9464713715046603, |
|
"eval_loss": 0.2164454311132431, |
|
"eval_precision": 0.9452127659574469, |
|
"eval_recall": 0.9477333333333333, |
|
"eval_runtime": 100.7467, |
|
"eval_samples_per_second": 37.222, |
|
"eval_steps_per_second": 4.655, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5607861060329068e-05, |
|
"loss": 0.4483, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.9250666666666667, |
|
"eval_f1": 0.9274464239607538, |
|
"eval_loss": 0.3050013482570648, |
|
"eval_precision": 0.8988988988988988, |
|
"eval_recall": 0.9578666666666666, |
|
"eval_runtime": 100.8928, |
|
"eval_samples_per_second": 37.168, |
|
"eval_steps_per_second": 4.648, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4625228519195613e-05, |
|
"loss": 0.3347, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.9466666666666667, |
|
"eval_f1": 0.9471179270227393, |
|
"eval_loss": 0.22358763217926025, |
|
"eval_precision": 0.9391714735186156, |
|
"eval_recall": 0.9552, |
|
"eval_runtime": 100.8242, |
|
"eval_samples_per_second": 37.193, |
|
"eval_steps_per_second": 4.652, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3642595978062158e-05, |
|
"loss": 0.31, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.9386666666666666, |
|
"eval_f1": 0.9386011745862254, |
|
"eval_loss": 0.18921497464179993, |
|
"eval_precision": 0.9396044895777659, |
|
"eval_recall": 0.9376, |
|
"eval_runtime": 100.5904, |
|
"eval_samples_per_second": 37.28, |
|
"eval_steps_per_second": 4.662, |
|
"step": 1591 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2659963436928701e-05, |
|
"loss": 0.1936, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.9424, |
|
"eval_f1": 0.9430079155672824, |
|
"eval_loss": 0.24673737585544586, |
|
"eval_precision": 0.933159268929504, |
|
"eval_recall": 0.9530666666666666, |
|
"eval_runtime": 100.7748, |
|
"eval_samples_per_second": 37.212, |
|
"eval_steps_per_second": 4.654, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1677330895795248e-05, |
|
"loss": 0.2855, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.9384, |
|
"eval_f1": 0.9391304347826087, |
|
"eval_loss": 0.37995821237564087, |
|
"eval_precision": 0.928125, |
|
"eval_recall": 0.9504, |
|
"eval_runtime": 100.6345, |
|
"eval_samples_per_second": 37.264, |
|
"eval_steps_per_second": 4.66, |
|
"step": 1677 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0694698354661791e-05, |
|
"loss": 0.2566, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.9370666666666667, |
|
"eval_f1": 0.9371671991480297, |
|
"eval_loss": 0.38548287749290466, |
|
"eval_precision": 0.935672514619883, |
|
"eval_recall": 0.9386666666666666, |
|
"eval_runtime": 101.0016, |
|
"eval_samples_per_second": 37.128, |
|
"eval_steps_per_second": 4.643, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.712065813528338e-06, |
|
"loss": 0.2966, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.9512, |
|
"eval_f1": 0.950068212824011, |
|
"eval_loss": 0.28995445370674133, |
|
"eval_precision": 0.9726256983240223, |
|
"eval_recall": 0.9285333333333333, |
|
"eval_runtime": 100.6179, |
|
"eval_samples_per_second": 37.27, |
|
"eval_steps_per_second": 4.661, |
|
"step": 1763 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.729433272394881e-06, |
|
"loss": 0.2485, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9549333333333333, |
|
"eval_f1": 0.9545087483176311, |
|
"eval_loss": 0.24945972859859467, |
|
"eval_precision": 0.9635869565217391, |
|
"eval_recall": 0.9456, |
|
"eval_runtime": 100.5538, |
|
"eval_samples_per_second": 37.293, |
|
"eval_steps_per_second": 4.664, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.746800731261426e-06, |
|
"loss": 0.2553, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.9357333333333333, |
|
"eval_f1": 0.9367620047231697, |
|
"eval_loss": 0.394607812166214, |
|
"eval_precision": 0.922004132231405, |
|
"eval_recall": 0.952, |
|
"eval_runtime": 100.5784, |
|
"eval_samples_per_second": 37.284, |
|
"eval_steps_per_second": 4.663, |
|
"step": 1849 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.764168190127972e-06, |
|
"loss": 0.2719, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9533333333333334, |
|
"eval_f1": 0.9524068534131085, |
|
"eval_loss": 0.361551433801651, |
|
"eval_precision": 0.9716981132075472, |
|
"eval_recall": 0.9338666666666666, |
|
"eval_runtime": 100.5791, |
|
"eval_samples_per_second": 37.284, |
|
"eval_steps_per_second": 4.663, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5.781535648994515e-06, |
|
"loss": 0.2925, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9541333333333334, |
|
"eval_f1": 0.9532862574687669, |
|
"eval_loss": 0.30848976969718933, |
|
"eval_precision": 0.9712230215827338, |
|
"eval_recall": 0.936, |
|
"eval_runtime": 100.6234, |
|
"eval_samples_per_second": 37.268, |
|
"eval_steps_per_second": 4.661, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.798903107861061e-06, |
|
"loss": 0.2477, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.9544, |
|
"eval_f1": 0.9536710918450284, |
|
"eval_loss": 0.28153562545776367, |
|
"eval_precision": 0.9691629955947136, |
|
"eval_recall": 0.9386666666666666, |
|
"eval_runtime": 100.4809, |
|
"eval_samples_per_second": 37.321, |
|
"eval_steps_per_second": 4.668, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.816270566727605e-06, |
|
"loss": 0.2164, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9568, |
|
"eval_f1": 0.9560499186109603, |
|
"eval_loss": 0.24747176468372345, |
|
"eval_precision": 0.9729431253451132, |
|
"eval_recall": 0.9397333333333333, |
|
"eval_runtime": 100.5076, |
|
"eval_samples_per_second": 37.311, |
|
"eval_steps_per_second": 4.666, |
|
"step": 2021 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.83363802559415e-06, |
|
"loss": 0.2816, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9570666666666666, |
|
"eval_f1": 0.9563093622795115, |
|
"eval_loss": 0.263465017080307, |
|
"eval_precision": 0.9734806629834254, |
|
"eval_recall": 0.9397333333333333, |
|
"eval_runtime": 100.4934, |
|
"eval_samples_per_second": 37.316, |
|
"eval_steps_per_second": 4.667, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.851005484460695e-06, |
|
"loss": 0.351, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.9573333333333334, |
|
"eval_f1": 0.9566160520607375, |
|
"eval_loss": 0.23055072128772736, |
|
"eval_precision": 0.972972972972973, |
|
"eval_recall": 0.9408, |
|
"eval_runtime": 100.5497, |
|
"eval_samples_per_second": 37.295, |
|
"eval_steps_per_second": 4.664, |
|
"step": 2107 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.683729433272396e-07, |
|
"loss": 0.2591, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.9562666666666667, |
|
"eval_f1": 0.955288985823337, |
|
"eval_loss": 0.2391371726989746, |
|
"eval_precision": 0.9771332961517011, |
|
"eval_recall": 0.9344, |
|
"eval_runtime": 100.6094, |
|
"eval_samples_per_second": 37.273, |
|
"eval_steps_per_second": 4.662, |
|
"step": 2150 |
|
} |
|
], |
|
"max_steps": 2188, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.62524823552e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|