|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 81700, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.119951040391677e-09, |
|
"loss": 1.9236, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.5299877600979192e-06, |
|
"loss": 1.7998, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.0599755201958383e-06, |
|
"loss": 1.4687, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.589963280293758e-06, |
|
"loss": 1.3699, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5730027548209367, |
|
"eval_f1": 0.10407805854390793, |
|
"eval_loss": 1.3314192295074463, |
|
"eval_precision": 0.08185753640299095, |
|
"eval_recall": 0.14285714285714285, |
|
"eval_runtime": 0.5773, |
|
"eval_samples_per_second": 628.824, |
|
"eval_steps_per_second": 79.686, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.119951040391677e-06, |
|
"loss": 1.3287, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.649938800489596e-06, |
|
"loss": 1.3002, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.179926560587515e-06, |
|
"loss": 1.258, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6060606060606061, |
|
"eval_f1": 0.183279210886012, |
|
"eval_loss": 1.1635808944702148, |
|
"eval_precision": 0.15974025974025974, |
|
"eval_recall": 0.21892762410003788, |
|
"eval_runtime": 0.5777, |
|
"eval_samples_per_second": 628.372, |
|
"eval_steps_per_second": 79.628, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.0709914320685436e-05, |
|
"loss": 1.1884, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.2239902080783353e-05, |
|
"loss": 1.1126, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.3769889840881272e-05, |
|
"loss": 1.1123, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6143250688705234, |
|
"eval_f1": 0.342984622186972, |
|
"eval_loss": 1.053414225578308, |
|
"eval_precision": 0.31294195637816785, |
|
"eval_recall": 0.3835992619859615, |
|
"eval_runtime": 0.5781, |
|
"eval_samples_per_second": 627.939, |
|
"eval_steps_per_second": 79.574, |
|
"step": 2451 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.5299877600979193e-05, |
|
"loss": 1.0996, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.6829865361077112e-05, |
|
"loss": 0.9999, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.835985312117503e-05, |
|
"loss": 1.0141, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.988984088127295e-05, |
|
"loss": 0.993, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6528925619834711, |
|
"eval_f1": 0.377005484318265, |
|
"eval_loss": 1.0341299772262573, |
|
"eval_precision": 0.4038621706705789, |
|
"eval_recall": 0.4236673117522871, |
|
"eval_runtime": 0.5774, |
|
"eval_samples_per_second": 628.627, |
|
"eval_steps_per_second": 79.661, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.1419828641370872e-05, |
|
"loss": 0.8827, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 2.294981640146879e-05, |
|
"loss": 0.9059, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.4479804161566707e-05, |
|
"loss": 0.8739, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6611570247933884, |
|
"eval_f1": 0.3730793016507302, |
|
"eval_loss": 0.9818496704101562, |
|
"eval_precision": 0.48520306297883803, |
|
"eval_recall": 0.39812740967297616, |
|
"eval_runtime": 0.5781, |
|
"eval_samples_per_second": 627.881, |
|
"eval_steps_per_second": 79.566, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.600979192166463e-05, |
|
"loss": 0.8024, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.7539779681762544e-05, |
|
"loss": 0.8021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 2.9069767441860467e-05, |
|
"loss": 0.8587, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6446280991735537, |
|
"eval_f1": 0.372084738159454, |
|
"eval_loss": 1.1154053211212158, |
|
"eval_precision": 0.4065005040497793, |
|
"eval_recall": 0.38243464936814686, |
|
"eval_runtime": 0.5773, |
|
"eval_samples_per_second": 628.792, |
|
"eval_steps_per_second": 79.682, |
|
"step": 4902 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3.0599755201958386e-05, |
|
"loss": 0.7567, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.2123623011015916e-05, |
|
"loss": 0.6544, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.365361077111383e-05, |
|
"loss": 0.7251, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.650137741046832, |
|
"eval_f1": 0.47516667580958355, |
|
"eval_loss": 1.0870000123977661, |
|
"eval_precision": 0.4639813382108336, |
|
"eval_recall": 0.4947985272800543, |
|
"eval_runtime": 0.5771, |
|
"eval_samples_per_second": 628.974, |
|
"eval_steps_per_second": 79.705, |
|
"step": 5719 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.5183598531211754e-05, |
|
"loss": 0.6829, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 3.6707466340269284e-05, |
|
"loss": 0.5838, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 3.82374541003672e-05, |
|
"loss": 0.6713, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 3.9767441860465115e-05, |
|
"loss": 0.6269, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5950413223140496, |
|
"eval_f1": 0.44966469663834185, |
|
"eval_loss": 1.2386085987091064, |
|
"eval_precision": 0.4427425690783476, |
|
"eval_recall": 0.5007950793049315, |
|
"eval_runtime": 0.5812, |
|
"eval_samples_per_second": 624.61, |
|
"eval_steps_per_second": 79.152, |
|
"step": 6536 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 4.129742962056304e-05, |
|
"loss": 0.5744, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 4.282741738066095e-05, |
|
"loss": 0.5549, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 4.435740514075888e-05, |
|
"loss": 0.6049, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6198347107438017, |
|
"eval_f1": 0.32356296980065924, |
|
"eval_loss": 1.368962287902832, |
|
"eval_precision": 0.47826317360800125, |
|
"eval_recall": 0.30602226522793025, |
|
"eval_runtime": 0.5769, |
|
"eval_samples_per_second": 629.173, |
|
"eval_steps_per_second": 79.73, |
|
"step": 7353 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.58873929008568e-05, |
|
"loss": 0.5767, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 4.741738066095471e-05, |
|
"loss": 0.5394, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 4.894124847001224e-05, |
|
"loss": 0.5507, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6446280991735537, |
|
"eval_f1": 0.4573642555682361, |
|
"eval_loss": 1.2706860303878784, |
|
"eval_precision": 0.4887882409493839, |
|
"eval_recall": 0.4411244714508261, |
|
"eval_runtime": 0.5758, |
|
"eval_samples_per_second": 630.429, |
|
"eval_steps_per_second": 79.889, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 4.99998647112497e-05, |
|
"loss": 0.609, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.999756011662697e-05, |
|
"loss": 0.5261, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 4.9992429864131085e-05, |
|
"loss": 0.5329, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6611570247933884, |
|
"eval_f1": 0.44773949967842547, |
|
"eval_loss": 1.4297670125961304, |
|
"eval_precision": 0.4707209760487202, |
|
"eval_recall": 0.4446715025532759, |
|
"eval_runtime": 0.5769, |
|
"eval_samples_per_second": 629.193, |
|
"eval_steps_per_second": 79.732, |
|
"step": 8987 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.998443333261708e-05, |
|
"loss": 0.5434, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 4.997358633418236e-05, |
|
"loss": 0.4591, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 4.9959890106357945e-05, |
|
"loss": 0.5308, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 4.9943346211742556e-05, |
|
"loss": 0.5027, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6776859504132231, |
|
"eval_f1": 0.4263136100758106, |
|
"eval_loss": 1.5769587755203247, |
|
"eval_precision": 0.4877654234638867, |
|
"eval_recall": 0.42436502673571636, |
|
"eval_runtime": 0.5768, |
|
"eval_samples_per_second": 629.365, |
|
"eval_steps_per_second": 79.754, |
|
"step": 9804 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 4.9923956537824456e-05, |
|
"loss": 0.4841, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 4.990172329676604e-05, |
|
"loss": 0.4372, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 4.987664902515149e-05, |
|
"loss": 0.4622, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7052341597796143, |
|
"eval_f1": 0.4098788238632856, |
|
"eval_loss": 1.5736510753631592, |
|
"eval_precision": 0.4891951861291893, |
|
"eval_recall": 0.38998470741081576, |
|
"eval_runtime": 0.5769, |
|
"eval_samples_per_second": 629.25, |
|
"eval_steps_per_second": 79.74, |
|
"step": 10621 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 4.984873658369735e-05, |
|
"loss": 0.4393, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 4.981798915692615e-05, |
|
"loss": 0.4071, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 4.978441025280311e-05, |
|
"loss": 0.4388, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.4961367511874214, |
|
"eval_loss": 1.7623515129089355, |
|
"eval_precision": 0.5284679001386023, |
|
"eval_recall": 0.4817265964741334, |
|
"eval_runtime": 0.5819, |
|
"eval_samples_per_second": 623.821, |
|
"eval_steps_per_second": 79.052, |
|
"step": 11438 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 4.9748003702335874e-05, |
|
"loss": 0.361, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 4.970893619788296e-05, |
|
"loss": 0.3574, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 4.966689840442375e-05, |
|
"loss": 0.3927, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 4.9622046371513854e-05, |
|
"loss": 0.3973, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.650137741046832, |
|
"eval_f1": 0.45953933518622214, |
|
"eval_loss": 1.8296639919281006, |
|
"eval_precision": 0.547006976703725, |
|
"eval_recall": 0.4533570015097109, |
|
"eval_runtime": 0.5782, |
|
"eval_samples_per_second": 627.762, |
|
"eval_steps_per_second": 79.551, |
|
"step": 12255 |
|
}, |
|
{ |
|
"epoch": 15.3, |
|
"learning_rate": 4.957438521630898e-05, |
|
"loss": 0.3236, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 4.952392037645683e-05, |
|
"loss": 0.3474, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 4.94706576094767e-05, |
|
"loss": 0.3578, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7079889807162535, |
|
"eval_f1": 0.49300722979484873, |
|
"eval_loss": 1.4506721496582031, |
|
"eval_precision": 0.5450297500169623, |
|
"eval_recall": 0.4661049985865257, |
|
"eval_runtime": 0.5767, |
|
"eval_samples_per_second": 629.403, |
|
"eval_steps_per_second": 79.759, |
|
"step": 13072 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 4.9414602992102564e-05, |
|
"loss": 0.3665, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 4.9355762919589846e-05, |
|
"loss": 0.3197, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.83, |
|
"learning_rate": 4.929414410498574e-05, |
|
"loss": 0.3632, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6336088154269972, |
|
"eval_f1": 0.44717986010161176, |
|
"eval_loss": 1.922100305557251, |
|
"eval_precision": 0.45028944394750214, |
|
"eval_recall": 0.46335067846767364, |
|
"eval_runtime": 0.5779, |
|
"eval_samples_per_second": 628.173, |
|
"eval_steps_per_second": 79.603, |
|
"step": 13889 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 4.922975357836337e-05, |
|
"loss": 0.3885, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 4.916259868601966e-05, |
|
"loss": 0.3569, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 4.909268708963725e-05, |
|
"loss": 0.3409, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.50463296007239, |
|
"eval_loss": 1.6839758157730103, |
|
"eval_precision": 0.532328893998914, |
|
"eval_recall": 0.4880845242606326, |
|
"eval_runtime": 0.5777, |
|
"eval_samples_per_second": 628.346, |
|
"eval_steps_per_second": 79.625, |
|
"step": 14706 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 4.902002676541038e-05, |
|
"loss": 0.3504, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 18.36, |
|
"learning_rate": 4.894462600313483e-05, |
|
"loss": 0.3144, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 4.8866493405262234e-05, |
|
"loss": 0.3445, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 4.878563788591853e-05, |
|
"loss": 0.2723, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.696969696969697, |
|
"eval_f1": 0.5104610371804872, |
|
"eval_loss": 1.8228646516799927, |
|
"eval_precision": 0.5547209922405293, |
|
"eval_recall": 0.49795298092711887, |
|
"eval_runtime": 0.5785, |
|
"eval_samples_per_second": 627.463, |
|
"eval_steps_per_second": 79.513, |
|
"step": 15523 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 4.8702068669887026e-05, |
|
"loss": 0.2524, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 19.58, |
|
"learning_rate": 4.8615795291555885e-05, |
|
"loss": 0.2412, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 4.852682759383042e-05, |
|
"loss": 0.3177, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6914600550964187, |
|
"eval_f1": 0.4887299967298263, |
|
"eval_loss": 1.9973198175430298, |
|
"eval_precision": 0.6312488850962071, |
|
"eval_recall": 0.4400535390991056, |
|
"eval_runtime": 0.5767, |
|
"eval_samples_per_second": 629.418, |
|
"eval_steps_per_second": 79.761, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 20.2, |
|
"learning_rate": 4.84355476675853e-05, |
|
"loss": 0.2544, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 20.5, |
|
"learning_rate": 4.8341232761821675e-05, |
|
"loss": 0.2752, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"learning_rate": 4.824425486142494e-05, |
|
"loss": 0.3368, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6831955922865014, |
|
"eval_f1": 0.45711959899817944, |
|
"eval_loss": 1.6511964797973633, |
|
"eval_precision": 0.5667705375640166, |
|
"eval_recall": 0.4405123543670341, |
|
"eval_runtime": 0.5771, |
|
"eval_samples_per_second": 629.043, |
|
"eval_steps_per_second": 79.713, |
|
"step": 17157 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 4.81446250305763e-05, |
|
"loss": 0.2763, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 21.42, |
|
"learning_rate": 4.804235463601491e-05, |
|
"loss": 0.2232, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"learning_rate": 4.793745534574111e-05, |
|
"loss": 0.2307, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.696969696969697, |
|
"eval_f1": 0.4706521650995364, |
|
"eval_loss": 1.9451159238815308, |
|
"eval_precision": 0.6334310394975756, |
|
"eval_recall": 0.4397407703134304, |
|
"eval_runtime": 0.5762, |
|
"eval_samples_per_second": 630.015, |
|
"eval_steps_per_second": 79.837, |
|
"step": 17974 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 4.782993912768523e-05, |
|
"loss": 0.2376, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 22.34, |
|
"learning_rate": 4.7719818248342104e-05, |
|
"loss": 0.2096, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 4.760756126979301e-05, |
|
"loss": 0.225, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 22.95, |
|
"learning_rate": 4.7492279345533425e-05, |
|
"loss": 0.259, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.4849675763556479, |
|
"eval_loss": 2.131509304046631, |
|
"eval_precision": 0.6931105898377018, |
|
"eval_recall": 0.424335591884853, |
|
"eval_runtime": 0.5766, |
|
"eval_samples_per_second": 629.576, |
|
"eval_steps_per_second": 79.781, |
|
"step": 18791 |
|
}, |
|
{ |
|
"epoch": 23.26, |
|
"learning_rate": 4.737443128349549e-05, |
|
"loss": 0.2214, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 23.56, |
|
"learning_rate": 4.7254030528931215e-05, |
|
"loss": 0.2338, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 23.87, |
|
"learning_rate": 4.7131090818328586e-05, |
|
"loss": 0.2387, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7024793388429752, |
|
"eval_f1": 0.5396161727004175, |
|
"eval_loss": 1.6646941900253296, |
|
"eval_precision": 0.5457638718508284, |
|
"eval_recall": 0.5445803409781244, |
|
"eval_runtime": 0.577, |
|
"eval_samples_per_second": 629.121, |
|
"eval_steps_per_second": 79.723, |
|
"step": 19608 |
|
}, |
|
{ |
|
"epoch": 24.17, |
|
"learning_rate": 4.700562617784434e-05, |
|
"loss": 0.2034, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 4.687765092170378e-05, |
|
"loss": 0.1752, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 4.6747179650567614e-05, |
|
"loss": 0.2089, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6721763085399449, |
|
"eval_f1": 0.4391025799309439, |
|
"eval_loss": 2.24601674079895, |
|
"eval_precision": 0.4972889051140895, |
|
"eval_recall": 0.428331664230679, |
|
"eval_runtime": 0.5771, |
|
"eval_samples_per_second": 629.0, |
|
"eval_steps_per_second": 79.708, |
|
"step": 20425 |
|
}, |
|
{ |
|
"epoch": 25.09, |
|
"learning_rate": 4.661422724986621e-05, |
|
"loss": 0.2451, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 25.4, |
|
"learning_rate": 4.647880888810129e-05, |
|
"loss": 0.1822, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 25.7, |
|
"learning_rate": 4.634094001511539e-05, |
|
"loss": 0.2117, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6721763085399449, |
|
"eval_f1": 0.5210259674763756, |
|
"eval_loss": 2.145235300064087, |
|
"eval_precision": 0.5857309337602864, |
|
"eval_recall": 0.48917402725900266, |
|
"eval_runtime": 0.5764, |
|
"eval_samples_per_second": 629.763, |
|
"eval_steps_per_second": 79.805, |
|
"step": 21242 |
|
}, |
|
{ |
|
"epoch": 26.01, |
|
"learning_rate": 4.620063636032916e-05, |
|
"loss": 0.2111, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 4.605791393094685e-05, |
|
"loss": 0.1778, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 26.62, |
|
"learning_rate": 4.591278901012997e-05, |
|
"loss": 0.2428, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"learning_rate": 4.576587292912579e-05, |
|
"loss": 0.2081, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6749311294765841, |
|
"eval_f1": 0.43598115798812087, |
|
"eval_loss": 2.0688838958740234, |
|
"eval_precision": 0.470814340268371, |
|
"eval_recall": 0.4225478100170218, |
|
"eval_runtime": 0.5781, |
|
"eval_samples_per_second": 627.962, |
|
"eval_steps_per_second": 79.576, |
|
"step": 22059 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 4.561600241196927e-05, |
|
"loss": 0.1615, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 4.546377982093819e-05, |
|
"loss": 0.2104, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 27.85, |
|
"learning_rate": 4.530922252306483e-05, |
|
"loss": 0.1858, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6584022038567493, |
|
"eval_f1": 0.44101738523719697, |
|
"eval_loss": 2.2427115440368652, |
|
"eval_precision": 0.5339727994900408, |
|
"eval_recall": 0.41865492881502736, |
|
"eval_runtime": 0.5762, |
|
"eval_samples_per_second": 630.011, |
|
"eval_steps_per_second": 79.836, |
|
"step": 22876 |
|
}, |
|
{ |
|
"epoch": 28.15, |
|
"learning_rate": 4.5152348151747534e-05, |
|
"loss": 0.1984, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"learning_rate": 4.499317460473887e-05, |
|
"loss": 0.2388, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 4.483172004210372e-05, |
|
"loss": 0.1924, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6831955922865014, |
|
"eval_f1": 0.4384895896523804, |
|
"eval_loss": 2.0502805709838867, |
|
"eval_precision": 0.6932407169452727, |
|
"eval_recall": 0.39043344791497503, |
|
"eval_runtime": 0.5764, |
|
"eval_samples_per_second": 629.797, |
|
"eval_steps_per_second": 79.809, |
|
"step": 23693 |
|
}, |
|
{ |
|
"epoch": 29.07, |
|
"learning_rate": 4.46680028841474e-05, |
|
"loss": 0.1952, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 29.38, |
|
"learning_rate": 4.450204180931408e-05, |
|
"loss": 0.1551, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 4.433453290310271e-05, |
|
"loss": 0.1758, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 29.99, |
|
"learning_rate": 4.4164835737020996e-05, |
|
"loss": 0.151, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6804407713498623, |
|
"eval_f1": 0.4790753635178762, |
|
"eval_loss": 2.1431803703308105, |
|
"eval_precision": 0.5843463803065593, |
|
"eval_recall": 0.43816301719626843, |
|
"eval_runtime": 0.5767, |
|
"eval_samples_per_second": 629.492, |
|
"eval_steps_per_second": 79.77, |
|
"step": 24510 |
|
}, |
|
{ |
|
"epoch": 30.29, |
|
"learning_rate": 4.399227494450688e-05, |
|
"loss": 0.1475, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 30.6, |
|
"learning_rate": 4.381754732872496e-05, |
|
"loss": 0.1696, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 30.91, |
|
"learning_rate": 4.3640672824299424e-05, |
|
"loss": 0.1978, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.42594599441215986, |
|
"eval_loss": 2.2966179847717285, |
|
"eval_precision": 0.5307515065579581, |
|
"eval_recall": 0.41329913928436096, |
|
"eval_runtime": 0.5775, |
|
"eval_samples_per_second": 628.614, |
|
"eval_steps_per_second": 79.659, |
|
"step": 25327 |
|
}, |
|
{ |
|
"epoch": 31.21, |
|
"learning_rate": 4.346167161079236e-05, |
|
"loss": 0.167, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 31.52, |
|
"learning_rate": 4.328056411040151e-05, |
|
"loss": 0.1383, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 31.82, |
|
"learning_rate": 4.309737098563029e-05, |
|
"loss": 0.1922, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6831955922865014, |
|
"eval_f1": 0.485788264860417, |
|
"eval_loss": 2.0524113178253174, |
|
"eval_precision": 0.5313209433300384, |
|
"eval_recall": 0.4710779846863592, |
|
"eval_runtime": 0.577, |
|
"eval_samples_per_second": 629.091, |
|
"eval_steps_per_second": 79.719, |
|
"step": 26144 |
|
}, |
|
{ |
|
"epoch": 32.13, |
|
"learning_rate": 4.29121131369304e-05, |
|
"loss": 0.199, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"learning_rate": 4.272481170031731e-05, |
|
"loss": 0.1964, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 32.74, |
|
"learning_rate": 4.253548804495887e-05, |
|
"loss": 0.1876, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.45277059579119083, |
|
"eval_loss": 2.3500843048095703, |
|
"eval_precision": 0.6191480424964332, |
|
"eval_recall": 0.4147965198457809, |
|
"eval_runtime": 0.5777, |
|
"eval_samples_per_second": 628.4, |
|
"eval_steps_per_second": 79.632, |
|
"step": 26961 |
|
}, |
|
{ |
|
"epoch": 33.05, |
|
"learning_rate": 4.234416377073727e-05, |
|
"loss": 0.1475, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 33.35, |
|
"learning_rate": 4.215086070578477e-05, |
|
"loss": 0.143, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 33.66, |
|
"learning_rate": 4.195560090399327e-05, |
|
"loss": 0.2021, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"learning_rate": 4.175840664249825e-05, |
|
"loss": 0.1649, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6584022038567493, |
|
"eval_f1": 0.4327576346908582, |
|
"eval_loss": 2.3630762100219727, |
|
"eval_precision": 0.46295462482075056, |
|
"eval_recall": 0.4282663662281889, |
|
"eval_runtime": 0.5765, |
|
"eval_samples_per_second": 629.661, |
|
"eval_steps_per_second": 79.792, |
|
"step": 27778 |
|
}, |
|
{ |
|
"epoch": 34.27, |
|
"learning_rate": 4.1559300419137124e-05, |
|
"loss": 0.1333, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"learning_rate": 4.135830494988252e-05, |
|
"loss": 0.1386, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 34.88, |
|
"learning_rate": 4.1155443166250585e-05, |
|
"loss": 0.2109, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6749311294765841, |
|
"eval_f1": 0.4902477696595343, |
|
"eval_loss": 2.1397335529327393, |
|
"eval_precision": 0.5198444762520393, |
|
"eval_recall": 0.49638105463228616, |
|
"eval_runtime": 0.578, |
|
"eval_samples_per_second": 628.013, |
|
"eval_steps_per_second": 79.583, |
|
"step": 28595 |
|
}, |
|
{ |
|
"epoch": 35.19, |
|
"learning_rate": 4.095073821268473e-05, |
|
"loss": 0.1839, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 35.5, |
|
"learning_rate": 4.0744213443915125e-05, |
|
"loss": 0.1558, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"learning_rate": 4.0536729253084094e-05, |
|
"loss": 0.127, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6694214876033058, |
|
"eval_f1": 0.4276520300779697, |
|
"eval_loss": 2.290356159210205, |
|
"eval_precision": 0.5064721567763604, |
|
"eval_recall": 0.40090887150616217, |
|
"eval_runtime": 0.5778, |
|
"eval_samples_per_second": 628.209, |
|
"eval_steps_per_second": 79.608, |
|
"step": 29412 |
|
}, |
|
{ |
|
"epoch": 36.11, |
|
"learning_rate": 4.032748663336774e-05, |
|
"loss": 0.2138, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 36.41, |
|
"learning_rate": 4.011565850261534e-05, |
|
"loss": 0.1587, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 36.72, |
|
"learning_rate": 3.990210583066674e-05, |
|
"loss": 0.1653, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6831955922865014, |
|
"eval_f1": 0.5118761031802896, |
|
"eval_loss": 2.097479820251465, |
|
"eval_precision": 0.558650914911419, |
|
"eval_recall": 0.4861388167114768, |
|
"eval_runtime": 0.5771, |
|
"eval_samples_per_second": 629.03, |
|
"eval_steps_per_second": 79.712, |
|
"step": 30229 |
|
}, |
|
{ |
|
"epoch": 37.03, |
|
"learning_rate": 3.968685298168575e-05, |
|
"loss": 0.1729, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 3.946992451380895e-05, |
|
"loss": 0.1092, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 37.64, |
|
"learning_rate": 3.9251345176343774e-05, |
|
"loss": 0.1322, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"learning_rate": 3.9031139906944916e-05, |
|
"loss": 0.1626, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.4688775335224489, |
|
"eval_loss": 2.4606480598449707, |
|
"eval_precision": 0.4988188909516061, |
|
"eval_recall": 0.45583700987026105, |
|
"eval_runtime": 0.5758, |
|
"eval_samples_per_second": 630.398, |
|
"eval_steps_per_second": 79.885, |
|
"step": 31046 |
|
}, |
|
{ |
|
"epoch": 38.25, |
|
"learning_rate": 3.880933382876915e-05, |
|
"loss": 0.1369, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 38.56, |
|
"learning_rate": 3.8585952247609145e-05, |
|
"loss": 0.1002, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 38.86, |
|
"learning_rate": 3.836102064900617e-05, |
|
"loss": 0.1454, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6914600550964187, |
|
"eval_f1": 0.5079144154287897, |
|
"eval_loss": 2.172344446182251, |
|
"eval_precision": 0.5805565750272889, |
|
"eval_recall": 0.4738757992144692, |
|
"eval_runtime": 0.5781, |
|
"eval_samples_per_second": 627.915, |
|
"eval_steps_per_second": 79.571, |
|
"step": 31863 |
|
}, |
|
{ |
|
"epoch": 39.17, |
|
"learning_rate": 3.8134564695342595e-05, |
|
"loss": 0.158, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 3.790661022291403e-05, |
|
"loss": 0.1078, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 39.78, |
|
"learning_rate": 3.767718323898163e-05, |
|
"loss": 0.1206, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.48536884689746623, |
|
"eval_loss": 2.1714024543762207, |
|
"eval_precision": 0.5719430108062781, |
|
"eval_recall": 0.4474171388272374, |
|
"eval_runtime": 0.5772, |
|
"eval_samples_per_second": 628.944, |
|
"eval_steps_per_second": 79.701, |
|
"step": 32680 |
|
}, |
|
{ |
|
"epoch": 40.09, |
|
"learning_rate": 3.744630991880502e-05, |
|
"loss": 0.1158, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 40.39, |
|
"learning_rate": 3.7214016602655875e-05, |
|
"loss": 0.115, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 40.7, |
|
"learning_rate": 3.698032979281283e-05, |
|
"loss": 0.1092, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6914600550964187, |
|
"eval_f1": 0.4980899463610581, |
|
"eval_loss": 2.2357773780822754, |
|
"eval_precision": 0.5844052283360602, |
|
"eval_recall": 0.46764869298736295, |
|
"eval_runtime": 0.5777, |
|
"eval_samples_per_second": 628.373, |
|
"eval_steps_per_second": 79.629, |
|
"step": 33497 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 3.6745276150537816e-05, |
|
"loss": 0.1262, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 41.31, |
|
"learning_rate": 3.650983070128024e-05, |
|
"loss": 0.1034, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 41.62, |
|
"learning_rate": 3.6272129196890194e-05, |
|
"loss": 0.0885, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 41.92, |
|
"learning_rate": 3.603410013431832e-05, |
|
"loss": 0.1156, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6584022038567493, |
|
"eval_f1": 0.4280488256791789, |
|
"eval_loss": 2.558166265487671, |
|
"eval_precision": 0.45057717568928324, |
|
"eval_recall": 0.416898799749785, |
|
"eval_runtime": 0.5768, |
|
"eval_samples_per_second": 629.389, |
|
"eval_steps_per_second": 79.757, |
|
"step": 34314 |
|
}, |
|
{ |
|
"epoch": 42.23, |
|
"learning_rate": 3.579385880846232e-05, |
|
"loss": 0.1169, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 42.53, |
|
"learning_rate": 3.5552386014312834e-05, |
|
"loss": 0.099, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 42.84, |
|
"learning_rate": 3.530970930143252e-05, |
|
"loss": 0.093, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.628099173553719, |
|
"eval_f1": 0.4313278010079715, |
|
"eval_loss": 2.627509355545044, |
|
"eval_precision": 0.5279577964214083, |
|
"eval_recall": 0.40927325616365023, |
|
"eval_runtime": 0.576, |
|
"eval_samples_per_second": 630.259, |
|
"eval_steps_per_second": 79.868, |
|
"step": 35131 |
|
}, |
|
{ |
|
"epoch": 43.15, |
|
"learning_rate": 3.5065856356738794e-05, |
|
"loss": 0.1584, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 43.45, |
|
"learning_rate": 3.482085500134504e-05, |
|
"loss": 0.0967, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 43.76, |
|
"learning_rate": 3.4574733187386486e-05, |
|
"loss": 0.1484, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6694214876033058, |
|
"eval_f1": 0.43577912693695875, |
|
"eval_loss": 2.5017268657684326, |
|
"eval_precision": 0.5655394346579834, |
|
"eval_recall": 0.3971485787064605, |
|
"eval_runtime": 0.5768, |
|
"eval_samples_per_second": 629.305, |
|
"eval_steps_per_second": 79.747, |
|
"step": 35948 |
|
}, |
|
{ |
|
"epoch": 44.06, |
|
"learning_rate": 3.432751899483116e-05, |
|
"loss": 0.1379, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 44.37, |
|
"learning_rate": 3.4079240628276246e-05, |
|
"loss": 0.0877, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 44.68, |
|
"learning_rate": 3.382992641373025e-05, |
|
"loss": 0.1466, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 44.98, |
|
"learning_rate": 3.357960479538127e-05, |
|
"loss": 0.1411, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6914600550964187, |
|
"eval_f1": 0.5234198940288918, |
|
"eval_loss": 2.3302204608917236, |
|
"eval_precision": 0.5942265966479041, |
|
"eval_recall": 0.5061625600726586, |
|
"eval_runtime": 0.5778, |
|
"eval_samples_per_second": 628.231, |
|
"eval_steps_per_second": 79.611, |
|
"step": 36765 |
|
}, |
|
{ |
|
"epoch": 45.29, |
|
"learning_rate": 3.332830433235184e-05, |
|
"loss": 0.0849, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 45.59, |
|
"learning_rate": 3.307605369544058e-05, |
|
"loss": 0.126, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 45.9, |
|
"learning_rate": 3.28228816638512e-05, |
|
"loss": 0.1003, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.696969696969697, |
|
"eval_f1": 0.5745871087976351, |
|
"eval_loss": 2.3890013694763184, |
|
"eval_precision": 0.6828113242342491, |
|
"eval_recall": 0.5128949833691213, |
|
"eval_runtime": 0.577, |
|
"eval_samples_per_second": 629.101, |
|
"eval_steps_per_second": 79.721, |
|
"step": 37582 |
|
}, |
|
{ |
|
"epoch": 46.21, |
|
"learning_rate": 3.256881712190906e-05, |
|
"loss": 0.11, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 46.51, |
|
"learning_rate": 3.231388905576575e-05, |
|
"loss": 0.1225, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"learning_rate": 3.205915122367602e-05, |
|
"loss": 0.1245, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6804407713498623, |
|
"eval_f1": 0.5184040661055641, |
|
"eval_loss": 2.339553117752075, |
|
"eval_precision": 0.6566468598124048, |
|
"eval_recall": 0.48311657554268395, |
|
"eval_runtime": 0.5772, |
|
"eval_samples_per_second": 628.951, |
|
"eval_steps_per_second": 79.702, |
|
"step": 38399 |
|
}, |
|
{ |
|
"epoch": 47.12, |
|
"learning_rate": 3.180258662113338e-05, |
|
"loss": 0.1008, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 47.43, |
|
"learning_rate": 3.154524591341034e-05, |
|
"loss": 0.121, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 47.74, |
|
"learning_rate": 3.128715846043534e-05, |
|
"loss": 0.0998, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.696969696969697, |
|
"eval_f1": 0.5459744525861538, |
|
"eval_loss": 2.4267241954803467, |
|
"eval_precision": 0.6807294155120241, |
|
"eval_recall": 0.4855936141034663, |
|
"eval_runtime": 0.5758, |
|
"eval_samples_per_second": 630.432, |
|
"eval_steps_per_second": 79.889, |
|
"step": 39216 |
|
}, |
|
{ |
|
"epoch": 48.04, |
|
"learning_rate": 3.102835370733277e-05, |
|
"loss": 0.0962, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 48.35, |
|
"learning_rate": 3.07688611810636e-05, |
|
"loss": 0.0778, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"learning_rate": 3.0508710487056635e-05, |
|
"loss": 0.1145, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"learning_rate": 3.0247931305830845e-05, |
|
"loss": 0.1048, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6749311294765841, |
|
"eval_f1": 0.536908729765805, |
|
"eval_loss": 2.3678715229034424, |
|
"eval_precision": 0.6530412239759585, |
|
"eval_recall": 0.48379632135789763, |
|
"eval_runtime": 0.578, |
|
"eval_samples_per_second": 628.037, |
|
"eval_steps_per_second": 79.586, |
|
"step": 40033 |
|
}, |
|
{ |
|
"epoch": 49.27, |
|
"learning_rate": 2.998655338960914e-05, |
|
"loss": 0.0751, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 49.57, |
|
"learning_rate": 2.9724606558923933e-05, |
|
"loss": 0.107, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 49.88, |
|
"learning_rate": 2.9462120699214922e-05, |
|
"loss": 0.0605, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.5723258179466014, |
|
"eval_loss": 2.3588879108428955, |
|
"eval_precision": 0.6557361185869716, |
|
"eval_recall": 0.5364202334337802, |
|
"eval_runtime": 0.5756, |
|
"eval_samples_per_second": 630.622, |
|
"eval_steps_per_second": 79.913, |
|
"step": 40850 |
|
}, |
|
{ |
|
"epoch": 50.18, |
|
"learning_rate": 2.9199125757419482e-05, |
|
"loss": 0.0718, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 50.49, |
|
"learning_rate": 2.8935651738556013e-05, |
|
"loss": 0.0661, |
|
"step": 41250 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"learning_rate": 2.8671728702300687e-05, |
|
"loss": 0.101, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7024793388429752, |
|
"eval_f1": 0.5646342747625367, |
|
"eval_loss": 2.261863946914673, |
|
"eval_precision": 0.6370711351914359, |
|
"eval_recall": 0.5380352030290453, |
|
"eval_runtime": 0.5759, |
|
"eval_samples_per_second": 630.369, |
|
"eval_steps_per_second": 79.881, |
|
"step": 41667 |
|
}, |
|
{ |
|
"epoch": 51.1, |
|
"learning_rate": 2.840738675955793e-05, |
|
"loss": 0.1039, |
|
"step": 41750 |
|
}, |
|
{ |
|
"epoch": 51.41, |
|
"learning_rate": 2.814371572615747e-05, |
|
"loss": 0.0769, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 51.71, |
|
"learning_rate": 2.787862786484319e-05, |
|
"loss": 0.0858, |
|
"step": 42250 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.5195773061626721, |
|
"eval_loss": 2.5068295001983643, |
|
"eval_precision": 0.6063341583445594, |
|
"eval_recall": 0.4876037550298634, |
|
"eval_runtime": 0.5762, |
|
"eval_samples_per_second": 629.983, |
|
"eval_steps_per_second": 79.833, |
|
"step": 42484 |
|
}, |
|
{ |
|
"epoch": 52.02, |
|
"learning_rate": 2.761321158169134e-05, |
|
"loss": 0.0844, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 52.33, |
|
"learning_rate": 2.734749715797047e-05, |
|
"loss": 0.069, |
|
"step": 42750 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 2.7081514908963913e-05, |
|
"loss": 0.0784, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 52.94, |
|
"learning_rate": 2.681529518051109e-05, |
|
"loss": 0.0795, |
|
"step": 43250 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.4958589687437866, |
|
"eval_loss": 2.4998273849487305, |
|
"eval_precision": 0.6786654073384627, |
|
"eval_recall": 0.45129423122033957, |
|
"eval_runtime": 0.5744, |
|
"eval_samples_per_second": 631.991, |
|
"eval_steps_per_second": 80.087, |
|
"step": 43301 |
|
}, |
|
{ |
|
"epoch": 53.24, |
|
"learning_rate": 2.6548868345545402e-05, |
|
"loss": 0.072, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 53.55, |
|
"learning_rate": 2.628226480062897e-05, |
|
"loss": 0.0401, |
|
"step": 43750 |
|
}, |
|
{ |
|
"epoch": 53.86, |
|
"learning_rate": 2.6015514962484717e-05, |
|
"loss": 0.0674, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6639118457300276, |
|
"eval_f1": 0.5109508473836721, |
|
"eval_loss": 2.704511880874634, |
|
"eval_precision": 0.6101414588534028, |
|
"eval_recall": 0.47100482987182496, |
|
"eval_runtime": 0.5772, |
|
"eval_samples_per_second": 628.872, |
|
"eval_steps_per_second": 79.692, |
|
"step": 44118 |
|
}, |
|
{ |
|
"epoch": 54.16, |
|
"learning_rate": 2.574971691776212e-05, |
|
"loss": 0.0601, |
|
"step": 44250 |
|
}, |
|
{ |
|
"epoch": 54.47, |
|
"learning_rate": 2.5482766087609973e-05, |
|
"loss": 0.0502, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 54.77, |
|
"learning_rate": 2.5215760178811658e-05, |
|
"loss": 0.1174, |
|
"step": 44750 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.516165985819526, |
|
"eval_loss": 2.3755171298980713, |
|
"eval_precision": 0.5969723691945914, |
|
"eval_recall": 0.4715294694358734, |
|
"eval_runtime": 0.5774, |
|
"eval_samples_per_second": 628.713, |
|
"eval_steps_per_second": 79.672, |
|
"step": 44935 |
|
}, |
|
{ |
|
"epoch": 55.08, |
|
"learning_rate": 2.4948729653995663e-05, |
|
"loss": 0.0826, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 55.39, |
|
"learning_rate": 2.4681704978598928e-05, |
|
"loss": 0.0422, |
|
"step": 45250 |
|
}, |
|
{ |
|
"epoch": 55.69, |
|
"learning_rate": 2.4414716617390998e-05, |
|
"loss": 0.059, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 2.414779503099838e-05, |
|
"loss": 0.0886, |
|
"step": 45750 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6804407713498623, |
|
"eval_f1": 0.47301725201698097, |
|
"eval_loss": 2.507392406463623, |
|
"eval_precision": 0.4981856952904839, |
|
"eval_recall": 0.4597725584486668, |
|
"eval_runtime": 0.5761, |
|
"eval_samples_per_second": 630.088, |
|
"eval_steps_per_second": 79.846, |
|
"step": 45752 |
|
}, |
|
{ |
|
"epoch": 56.3, |
|
"learning_rate": 2.388097067242925e-05, |
|
"loss": 0.0528, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 56.61, |
|
"learning_rate": 2.3614273983599068e-05, |
|
"loss": 0.0643, |
|
"step": 46250 |
|
}, |
|
{ |
|
"epoch": 56.92, |
|
"learning_rate": 2.334773539185752e-05, |
|
"loss": 0.058, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6804407713498623, |
|
"eval_f1": 0.4807142977366666, |
|
"eval_loss": 2.5247209072113037, |
|
"eval_precision": 0.5200869236583522, |
|
"eval_recall": 0.45992668729737696, |
|
"eval_runtime": 0.5758, |
|
"eval_samples_per_second": 630.387, |
|
"eval_steps_per_second": 79.884, |
|
"step": 46569 |
|
}, |
|
{ |
|
"epoch": 57.22, |
|
"learning_rate": 2.308138530651701e-05, |
|
"loss": 0.0823, |
|
"step": 46750 |
|
}, |
|
{ |
|
"epoch": 57.53, |
|
"learning_rate": 2.2815254115383325e-05, |
|
"loss": 0.0428, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 57.83, |
|
"learning_rate": 2.254937218128868e-05, |
|
"loss": 0.0558, |
|
"step": 47250 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6914600550964187, |
|
"eval_f1": 0.4594613590535599, |
|
"eval_loss": 2.47005033493042, |
|
"eval_precision": 0.6139090044619999, |
|
"eval_recall": 0.4228677965439049, |
|
"eval_runtime": 0.5759, |
|
"eval_samples_per_second": 630.356, |
|
"eval_steps_per_second": 79.88, |
|
"step": 47386 |
|
}, |
|
{ |
|
"epoch": 58.14, |
|
"learning_rate": 2.2283769838627582e-05, |
|
"loss": 0.0931, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 58.45, |
|
"learning_rate": 2.2018477389896086e-05, |
|
"loss": 0.0508, |
|
"step": 47750 |
|
}, |
|
{ |
|
"epoch": 58.75, |
|
"learning_rate": 2.1753525102234484e-05, |
|
"loss": 0.0452, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.46439631667557835, |
|
"eval_loss": 2.541719675064087, |
|
"eval_precision": 0.5814331265699046, |
|
"eval_recall": 0.43331322139819684, |
|
"eval_runtime": 0.5754, |
|
"eval_samples_per_second": 630.853, |
|
"eval_steps_per_second": 79.943, |
|
"step": 48203 |
|
}, |
|
{ |
|
"epoch": 59.06, |
|
"learning_rate": 2.1488943203974225e-05, |
|
"loss": 0.0487, |
|
"step": 48250 |
|
}, |
|
{ |
|
"epoch": 59.36, |
|
"learning_rate": 2.122581776857167e-05, |
|
"loss": 0.0777, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 59.67, |
|
"learning_rate": 2.0963119490709552e-05, |
|
"loss": 0.0656, |
|
"step": 48750 |
|
}, |
|
{ |
|
"epoch": 59.98, |
|
"learning_rate": 2.0699825885128935e-05, |
|
"loss": 0.0589, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6721763085399449, |
|
"eval_f1": 0.4951052024505615, |
|
"eval_loss": 2.631096363067627, |
|
"eval_precision": 0.5573207158969782, |
|
"eval_recall": 0.4686150056839712, |
|
"eval_runtime": 0.5753, |
|
"eval_samples_per_second": 630.965, |
|
"eval_steps_per_second": 79.957, |
|
"step": 49020 |
|
}, |
|
{ |
|
"epoch": 60.28, |
|
"learning_rate": 2.0437022885194815e-05, |
|
"loss": 0.0345, |
|
"step": 49250 |
|
}, |
|
{ |
|
"epoch": 60.59, |
|
"learning_rate": 2.017474047402702e-05, |
|
"loss": 0.0656, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 60.89, |
|
"learning_rate": 1.991300857535151e-05, |
|
"loss": 0.0993, |
|
"step": 49750 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_f1": 0.4534585450214669, |
|
"eval_loss": 2.720975160598755, |
|
"eval_precision": 0.5187203506875638, |
|
"eval_recall": 0.423817756545589, |
|
"eval_runtime": 0.5751, |
|
"eval_samples_per_second": 631.214, |
|
"eval_steps_per_second": 79.989, |
|
"step": 49837 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"learning_rate": 1.9651857050086467e-05, |
|
"loss": 0.0831, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 61.51, |
|
"learning_rate": 1.9391315692935428e-05, |
|
"loss": 0.0378, |
|
"step": 50250 |
|
}, |
|
{ |
|
"epoch": 61.81, |
|
"learning_rate": 1.9131414228988008e-05, |
|
"loss": 0.0795, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.5159346012569708, |
|
"eval_loss": 2.37697696685791, |
|
"eval_precision": 0.6010404846231928, |
|
"eval_recall": 0.4778067239274136, |
|
"eval_runtime": 0.5752, |
|
"eval_samples_per_second": 631.136, |
|
"eval_steps_per_second": 79.979, |
|
"step": 50654 |
|
}, |
|
{ |
|
"epoch": 62.12, |
|
"learning_rate": 1.8872182310328605e-05, |
|
"loss": 0.0624, |
|
"step": 50750 |
|
}, |
|
{ |
|
"epoch": 62.42, |
|
"learning_rate": 1.861364951265337e-05, |
|
"loss": 0.0685, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 62.73, |
|
"learning_rate": 1.835584533189595e-05, |
|
"loss": 0.0438, |
|
"step": 51250 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.5006627978013612, |
|
"eval_loss": 2.5155351161956787, |
|
"eval_precision": 0.5974857177798354, |
|
"eval_recall": 0.4612218207353675, |
|
"eval_runtime": 0.5756, |
|
"eval_samples_per_second": 630.635, |
|
"eval_steps_per_second": 79.915, |
|
"step": 51471 |
|
}, |
|
{ |
|
"epoch": 63.04, |
|
"learning_rate": 1.8098799180862294e-05, |
|
"loss": 0.0512, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 63.34, |
|
"learning_rate": 1.784254038587494e-05, |
|
"loss": 0.0443, |
|
"step": 51750 |
|
}, |
|
{ |
|
"epoch": 63.65, |
|
"learning_rate": 1.758709818342722e-05, |
|
"loss": 0.0331, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 63.95, |
|
"learning_rate": 1.7332501716847632e-05, |
|
"loss": 0.0432, |
|
"step": 52250 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4691190044905215, |
|
"eval_loss": 2.5460140705108643, |
|
"eval_precision": 0.533927553927554, |
|
"eval_recall": 0.44402096603451285, |
|
"eval_runtime": 0.5748, |
|
"eval_samples_per_second": 631.506, |
|
"eval_steps_per_second": 80.026, |
|
"step": 52288 |
|
}, |
|
{ |
|
"epoch": 64.26, |
|
"learning_rate": 1.7078780032974923e-05, |
|
"loss": 0.0442, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 64.57, |
|
"learning_rate": 1.682596207884414e-05, |
|
"loss": 0.0281, |
|
"step": 52750 |
|
}, |
|
{ |
|
"epoch": 64.87, |
|
"learning_rate": 1.657608800518716e-05, |
|
"loss": 0.0609, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.7052341597796143, |
|
"eval_f1": 0.5783901018472066, |
|
"eval_loss": 2.4328320026397705, |
|
"eval_precision": 0.6903590657995944, |
|
"eval_recall": 0.5364771859229988, |
|
"eval_runtime": 0.5756, |
|
"eval_samples_per_second": 630.632, |
|
"eval_steps_per_second": 79.915, |
|
"step": 53105 |
|
}, |
|
{ |
|
"epoch": 65.18, |
|
"learning_rate": 1.6325156131696083e-05, |
|
"loss": 0.0475, |
|
"step": 53250 |
|
}, |
|
{ |
|
"epoch": 65.48, |
|
"learning_rate": 1.6075213968684297e-05, |
|
"loss": 0.0447, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"learning_rate": 1.5826290031982382e-05, |
|
"loss": 0.0426, |
|
"step": 53750 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7107438016528925, |
|
"eval_f1": 0.5337400275204507, |
|
"eval_loss": 2.376704216003418, |
|
"eval_precision": 0.6895915165326075, |
|
"eval_recall": 0.47468881009521396, |
|
"eval_runtime": 0.5754, |
|
"eval_samples_per_second": 630.914, |
|
"eval_steps_per_second": 79.95, |
|
"step": 53922 |
|
}, |
|
{ |
|
"epoch": 66.1, |
|
"learning_rate": 1.5578412721251766e-05, |
|
"loss": 0.0541, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"learning_rate": 1.533161031674459e-05, |
|
"loss": 0.0469, |
|
"step": 54250 |
|
}, |
|
{ |
|
"epoch": 66.71, |
|
"learning_rate": 1.5085910976077283e-05, |
|
"loss": 0.0194, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.7079889807162535, |
|
"eval_f1": 0.5368473703383648, |
|
"eval_loss": 2.5384085178375244, |
|
"eval_precision": 0.6938002473716759, |
|
"eval_recall": 0.4738072682653963, |
|
"eval_runtime": 0.575, |
|
"eval_samples_per_second": 631.275, |
|
"eval_steps_per_second": 79.996, |
|
"step": 54739 |
|
}, |
|
{ |
|
"epoch": 67.01, |
|
"learning_rate": 1.4841342731017988e-05, |
|
"loss": 0.0356, |
|
"step": 54750 |
|
}, |
|
{ |
|
"epoch": 67.32, |
|
"learning_rate": 1.4598904775700689e-05, |
|
"loss": 0.0421, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 67.63, |
|
"learning_rate": 1.4356677495577313e-05, |
|
"loss": 0.0423, |
|
"step": 55250 |
|
}, |
|
{ |
|
"epoch": 67.93, |
|
"learning_rate": 1.4115664509103169e-05, |
|
"loss": 0.0557, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7162534435261708, |
|
"eval_f1": 0.5433935912807646, |
|
"eval_loss": 2.393681049346924, |
|
"eval_precision": 0.6931352645638361, |
|
"eval_recall": 0.4979390717383328, |
|
"eval_runtime": 0.575, |
|
"eval_samples_per_second": 631.299, |
|
"eval_steps_per_second": 79.999, |
|
"step": 55556 |
|
}, |
|
{ |
|
"epoch": 68.24, |
|
"learning_rate": 1.3875893313381589e-05, |
|
"loss": 0.0432, |
|
"step": 55750 |
|
}, |
|
{ |
|
"epoch": 68.54, |
|
"learning_rate": 1.3637391263840368e-05, |
|
"loss": 0.0429, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 68.85, |
|
"learning_rate": 1.3400185571110769e-05, |
|
"loss": 0.0466, |
|
"step": 56250 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7107438016528925, |
|
"eval_f1": 0.5718625572894164, |
|
"eval_loss": 2.403398275375366, |
|
"eval_precision": 0.6765347833670815, |
|
"eval_recall": 0.5215229283579037, |
|
"eval_runtime": 0.5744, |
|
"eval_samples_per_second": 631.944, |
|
"eval_steps_per_second": 80.081, |
|
"step": 56373 |
|
}, |
|
{ |
|
"epoch": 69.16, |
|
"learning_rate": 1.316430329792307e-05, |
|
"loss": 0.0465, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 69.46, |
|
"learning_rate": 1.2929771356018988e-05, |
|
"loss": 0.0548, |
|
"step": 56750 |
|
}, |
|
{ |
|
"epoch": 69.77, |
|
"learning_rate": 1.2696616503081343e-05, |
|
"loss": 0.0517, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7052341597796143, |
|
"eval_f1": 0.5117271310290573, |
|
"eval_loss": 2.4453024864196777, |
|
"eval_precision": 0.7065175565175564, |
|
"eval_recall": 0.46373618855145954, |
|
"eval_runtime": 0.5763, |
|
"eval_samples_per_second": 629.826, |
|
"eval_steps_per_second": 79.813, |
|
"step": 57190 |
|
}, |
|
{ |
|
"epoch": 70.07, |
|
"learning_rate": 1.2464865339681253e-05, |
|
"loss": 0.0226, |
|
"step": 57250 |
|
}, |
|
{ |
|
"epoch": 70.38, |
|
"learning_rate": 1.2234544306243286e-05, |
|
"loss": 0.0292, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 70.69, |
|
"learning_rate": 1.2005679680028897e-05, |
|
"loss": 0.0368, |
|
"step": 57750 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"learning_rate": 1.177920411295463e-05, |
|
"loss": 0.0437, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.4571751135533447, |
|
"eval_loss": 2.6550002098083496, |
|
"eval_precision": 0.6424798334634401, |
|
"eval_recall": 0.41016626367611586, |
|
"eval_runtime": 0.5744, |
|
"eval_samples_per_second": 632.012, |
|
"eval_steps_per_second": 80.09, |
|
"step": 58007 |
|
}, |
|
{ |
|
"epoch": 71.3, |
|
"learning_rate": 1.1553324380057995e-05, |
|
"loss": 0.037, |
|
"step": 58250 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"learning_rate": 1.1328978774573311e-05, |
|
"loss": 0.0361, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 71.91, |
|
"learning_rate": 1.1106192892027184e-05, |
|
"loss": 0.0451, |
|
"step": 58750 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.476209725136688, |
|
"eval_loss": 2.6152384281158447, |
|
"eval_precision": 0.6680127132820342, |
|
"eval_recall": 0.4248004219371215, |
|
"eval_runtime": 0.5754, |
|
"eval_samples_per_second": 630.887, |
|
"eval_steps_per_second": 79.947, |
|
"step": 58824 |
|
}, |
|
{ |
|
"epoch": 72.22, |
|
"learning_rate": 1.0884992149997797e-05, |
|
"loss": 0.0112, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 72.52, |
|
"learning_rate": 1.066540178521517e-05, |
|
"loss": 0.043, |
|
"step": 59250 |
|
}, |
|
{ |
|
"epoch": 72.83, |
|
"learning_rate": 1.04474468506818e-05, |
|
"loss": 0.0294, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.5392699243016044, |
|
"eval_loss": 2.5824713706970215, |
|
"eval_precision": 0.6700918129489558, |
|
"eval_recall": 0.4843369001004469, |
|
"eval_runtime": 0.5751, |
|
"eval_samples_per_second": 631.242, |
|
"eval_steps_per_second": 79.992, |
|
"step": 59641 |
|
}, |
|
{ |
|
"epoch": 73.13, |
|
"learning_rate": 1.0231152212814419e-05, |
|
"loss": 0.0468, |
|
"step": 59750 |
|
}, |
|
{ |
|
"epoch": 73.44, |
|
"learning_rate": 1.0016542548606983e-05, |
|
"loss": 0.0316, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 73.75, |
|
"learning_rate": 9.803642342815278e-06, |
|
"loss": 0.0429, |
|
"step": 60250 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.696969696969697, |
|
"eval_f1": 0.48452204181281855, |
|
"eval_loss": 2.451073408126831, |
|
"eval_precision": 0.6746470741527906, |
|
"eval_recall": 0.4329222604161029, |
|
"eval_runtime": 0.5756, |
|
"eval_samples_per_second": 630.696, |
|
"eval_steps_per_second": 79.923, |
|
"step": 60458 |
|
}, |
|
{ |
|
"epoch": 74.05, |
|
"learning_rate": 9.592475885163487e-06, |
|
"loss": 0.0236, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 74.36, |
|
"learning_rate": 9.383067267572908e-06, |
|
"loss": 0.0221, |
|
"step": 60750 |
|
}, |
|
{ |
|
"epoch": 74.66, |
|
"learning_rate": 9.17544038141342e-06, |
|
"loss": 0.0313, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 74.97, |
|
"learning_rate": 8.969618914777637e-06, |
|
"loss": 0.0389, |
|
"step": 61250 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.6749311294765841, |
|
"eval_f1": 0.47978279633622645, |
|
"eval_loss": 2.7327232360839844, |
|
"eval_precision": 0.5541808071284741, |
|
"eval_recall": 0.44874471450826137, |
|
"eval_runtime": 0.5752, |
|
"eval_samples_per_second": 631.071, |
|
"eval_steps_per_second": 79.97, |
|
"step": 61275 |
|
}, |
|
{ |
|
"epoch": 75.28, |
|
"learning_rate": 8.765626349778362e-06, |
|
"loss": 0.0227, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 75.58, |
|
"learning_rate": 8.563485959869575e-06, |
|
"loss": 0.0303, |
|
"step": 61750 |
|
}, |
|
{ |
|
"epoch": 75.89, |
|
"learning_rate": 8.363220807191058e-06, |
|
"loss": 0.0305, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7024793388429752, |
|
"eval_f1": 0.5534820298880379, |
|
"eval_loss": 2.4534895420074463, |
|
"eval_precision": 0.6256500777723188, |
|
"eval_recall": 0.5284146306621677, |
|
"eval_runtime": 0.5743, |
|
"eval_samples_per_second": 632.097, |
|
"eval_steps_per_second": 80.1, |
|
"step": 62092 |
|
}, |
|
{ |
|
"epoch": 76.19, |
|
"learning_rate": 8.164853739937368e-06, |
|
"loss": 0.034, |
|
"step": 62250 |
|
}, |
|
{ |
|
"epoch": 76.5, |
|
"learning_rate": 7.968407389751003e-06, |
|
"loss": 0.034, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 76.81, |
|
"learning_rate": 7.773904169140392e-06, |
|
"loss": 0.023, |
|
"step": 62750 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7024793388429752, |
|
"eval_f1": 0.4687311706012172, |
|
"eval_loss": 2.6240921020507812, |
|
"eval_precision": 0.5393879300200679, |
|
"eval_recall": 0.4422954747770019, |
|
"eval_runtime": 0.5739, |
|
"eval_samples_per_second": 632.494, |
|
"eval_steps_per_second": 80.151, |
|
"step": 62909 |
|
}, |
|
{ |
|
"epoch": 77.11, |
|
"learning_rate": 7.581366268922896e-06, |
|
"loss": 0.0132, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 77.42, |
|
"learning_rate": 7.3908156556929705e-06, |
|
"loss": 0.0201, |
|
"step": 63250 |
|
}, |
|
{ |
|
"epoch": 77.72, |
|
"learning_rate": 7.20227406931612e-06, |
|
"loss": 0.0305, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.4474972103847281, |
|
"eval_loss": 2.737638235092163, |
|
"eval_precision": 0.539260468004995, |
|
"eval_recall": 0.407899366643209, |
|
"eval_runtime": 0.5754, |
|
"eval_samples_per_second": 630.913, |
|
"eval_steps_per_second": 79.95, |
|
"step": 63726 |
|
}, |
|
{ |
|
"epoch": 78.03, |
|
"learning_rate": 7.01576302044851e-06, |
|
"loss": 0.0258, |
|
"step": 63750 |
|
}, |
|
{ |
|
"epoch": 78.34, |
|
"learning_rate": 6.831303788082866e-06, |
|
"loss": 0.0212, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 78.64, |
|
"learning_rate": 6.6489174171207504e-06, |
|
"loss": 0.0178, |
|
"step": 64250 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 6.469341688844232e-06, |
|
"loss": 0.0278, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7052341597796143, |
|
"eval_f1": 0.5369051843353192, |
|
"eval_loss": 2.641106605529785, |
|
"eval_precision": 0.6364117161569525, |
|
"eval_recall": 0.48457279242377765, |
|
"eval_runtime": 0.5738, |
|
"eval_samples_per_second": 632.582, |
|
"eval_steps_per_second": 80.162, |
|
"step": 64543 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"learning_rate": 6.291154729438112e-06, |
|
"loss": 0.0211, |
|
"step": 64750 |
|
}, |
|
{ |
|
"epoch": 79.56, |
|
"learning_rate": 6.115102256888608e-06, |
|
"loss": 0.027, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 79.87, |
|
"learning_rate": 5.9412043569724075e-06, |
|
"loss": 0.0245, |
|
"step": 65250 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.5139284663519239, |
|
"eval_loss": 2.8429274559020996, |
|
"eval_precision": 0.6295462275834203, |
|
"eval_recall": 0.46902355991025946, |
|
"eval_runtime": 0.5746, |
|
"eval_samples_per_second": 631.731, |
|
"eval_steps_per_second": 80.054, |
|
"step": 65360 |
|
}, |
|
{ |
|
"epoch": 80.17, |
|
"learning_rate": 5.769480869651678e-06, |
|
"loss": 0.0177, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 80.48, |
|
"learning_rate": 5.599951386810407e-06, |
|
"loss": 0.0229, |
|
"step": 65750 |
|
}, |
|
{ |
|
"epoch": 80.78, |
|
"learning_rate": 5.432635250019285e-06, |
|
"loss": 0.0227, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.7245179063360881, |
|
"eval_f1": 0.5777058546571006, |
|
"eval_loss": 2.6588237285614014, |
|
"eval_precision": 0.6858633718409165, |
|
"eval_recall": 0.5245856565437846, |
|
"eval_runtime": 0.5738, |
|
"eval_samples_per_second": 632.57, |
|
"eval_steps_per_second": 80.16, |
|
"step": 66177 |
|
}, |
|
{ |
|
"epoch": 81.09, |
|
"learning_rate": 5.26755154832895e-06, |
|
"loss": 0.0261, |
|
"step": 66250 |
|
}, |
|
{ |
|
"epoch": 81.4, |
|
"learning_rate": 5.1047191160921495e-06, |
|
"loss": 0.0191, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 81.7, |
|
"learning_rate": 4.9441565308149724e-06, |
|
"loss": 0.0226, |
|
"step": 66750 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7024793388429752, |
|
"eval_f1": 0.5153196455189113, |
|
"eval_loss": 2.683469533920288, |
|
"eval_precision": 0.5957972186543615, |
|
"eval_recall": 0.47307357735313404, |
|
"eval_runtime": 0.5753, |
|
"eval_samples_per_second": 630.959, |
|
"eval_steps_per_second": 79.956, |
|
"step": 66994 |
|
}, |
|
{ |
|
"epoch": 82.01, |
|
"learning_rate": 4.785882111037252e-06, |
|
"loss": 0.0164, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 82.31, |
|
"learning_rate": 4.629913914242723e-06, |
|
"loss": 0.0207, |
|
"step": 67250 |
|
}, |
|
{ |
|
"epoch": 82.62, |
|
"learning_rate": 4.4762697347987634e-06, |
|
"loss": 0.0211, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 82.93, |
|
"learning_rate": 4.324967101926272e-06, |
|
"loss": 0.0224, |
|
"step": 67750 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.699724517906336, |
|
"eval_f1": 0.5510071098243216, |
|
"eval_loss": 2.7483408451080322, |
|
"eval_precision": 0.6299525870954442, |
|
"eval_recall": 0.5086697101475426, |
|
"eval_runtime": 0.5745, |
|
"eval_samples_per_second": 631.809, |
|
"eval_steps_per_second": 80.064, |
|
"step": 67811 |
|
}, |
|
{ |
|
"epoch": 83.23, |
|
"learning_rate": 4.176023277699789e-06, |
|
"loss": 0.027, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 83.54, |
|
"learning_rate": 4.029455255077999e-06, |
|
"loss": 0.0154, |
|
"step": 68250 |
|
}, |
|
{ |
|
"epoch": 83.84, |
|
"learning_rate": 3.8852797559650935e-06, |
|
"loss": 0.0195, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.699724517906336, |
|
"eval_f1": 0.52634249226563, |
|
"eval_loss": 2.7812142372131348, |
|
"eval_precision": 0.5949473728885494, |
|
"eval_recall": 0.49608820981973695, |
|
"eval_runtime": 0.5756, |
|
"eval_samples_per_second": 630.699, |
|
"eval_steps_per_second": 79.923, |
|
"step": 68628 |
|
}, |
|
{ |
|
"epoch": 84.15, |
|
"learning_rate": 3.7440754752084e-06, |
|
"loss": 0.012, |
|
"step": 68750 |
|
}, |
|
{ |
|
"epoch": 84.46, |
|
"learning_rate": 3.604724362660877e-06, |
|
"loss": 0.0262, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 84.76, |
|
"learning_rate": 3.467814231049432e-06, |
|
"loss": 0.0106, |
|
"step": 69250 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.6942148760330579, |
|
"eval_f1": 0.5085355239316488, |
|
"eval_loss": 2.8171160221099854, |
|
"eval_precision": 0.6049143980994077, |
|
"eval_recall": 0.4679249219581732, |
|
"eval_runtime": 0.5729, |
|
"eval_samples_per_second": 633.612, |
|
"eval_steps_per_second": 80.292, |
|
"step": 69445 |
|
}, |
|
{ |
|
"epoch": 85.07, |
|
"learning_rate": 3.33336070041218e-06, |
|
"loss": 0.0258, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 85.37, |
|
"learning_rate": 3.2013791105143466e-06, |
|
"loss": 0.0118, |
|
"step": 69750 |
|
}, |
|
{ |
|
"epoch": 85.68, |
|
"learning_rate": 3.071884519098131e-06, |
|
"loss": 0.0217, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 85.99, |
|
"learning_rate": 2.9448917001647703e-06, |
|
"loss": 0.0155, |
|
"step": 70250 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.4962240507440637, |
|
"eval_loss": 2.8894941806793213, |
|
"eval_precision": 0.5999622071050642, |
|
"eval_recall": 0.45233290929103737, |
|
"eval_runtime": 0.575, |
|
"eval_samples_per_second": 631.309, |
|
"eval_steps_per_second": 80.001, |
|
"step": 70262 |
|
}, |
|
{ |
|
"epoch": 86.29, |
|
"learning_rate": 2.820415142289015e-06, |
|
"loss": 0.0196, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 86.6, |
|
"learning_rate": 2.698951772130373e-06, |
|
"loss": 0.0064, |
|
"step": 70750 |
|
}, |
|
{ |
|
"epoch": 86.9, |
|
"learning_rate": 2.5795398473242555e-06, |
|
"loss": 0.0111, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4869283447078967, |
|
"eval_loss": 2.887157678604126, |
|
"eval_precision": 0.596748772605851, |
|
"eval_recall": 0.44054806714905237, |
|
"eval_runtime": 0.575, |
|
"eval_samples_per_second": 631.314, |
|
"eval_steps_per_second": 80.001, |
|
"step": 71079 |
|
}, |
|
{ |
|
"epoch": 87.21, |
|
"learning_rate": 2.462685866465117e-06, |
|
"loss": 0.0174, |
|
"step": 71250 |
|
}, |
|
{ |
|
"epoch": 87.52, |
|
"learning_rate": 2.3484031613905387e-06, |
|
"loss": 0.0105, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 87.82, |
|
"learning_rate": 2.2367047705819572e-06, |
|
"loss": 0.0097, |
|
"step": 71750 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4890889302500872, |
|
"eval_loss": 2.9936180114746094, |
|
"eval_precision": 0.5928653178112443, |
|
"eval_recall": 0.44590630018585686, |
|
"eval_runtime": 0.5751, |
|
"eval_samples_per_second": 631.226, |
|
"eval_steps_per_second": 79.99, |
|
"step": 71896 |
|
}, |
|
{ |
|
"epoch": 88.13, |
|
"learning_rate": 2.127603437677139e-06, |
|
"loss": 0.0083, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 88.43, |
|
"learning_rate": 2.0211116100162435e-06, |
|
"loss": 0.0124, |
|
"step": 72250 |
|
}, |
|
{ |
|
"epoch": 88.74, |
|
"learning_rate": 1.9172414372217146e-06, |
|
"loss": 0.0086, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.6831955922865014, |
|
"eval_f1": 0.49100657070581877, |
|
"eval_loss": 3.0106709003448486, |
|
"eval_precision": 0.5958884973599873, |
|
"eval_recall": 0.44583543850785234, |
|
"eval_runtime": 0.5744, |
|
"eval_samples_per_second": 631.965, |
|
"eval_steps_per_second": 80.084, |
|
"step": 72713 |
|
}, |
|
{ |
|
"epoch": 89.05, |
|
"learning_rate": 1.8160047698121518e-06, |
|
"loss": 0.0104, |
|
"step": 72750 |
|
}, |
|
{ |
|
"epoch": 89.35, |
|
"learning_rate": 1.7178022403391475e-06, |
|
"loss": 0.0108, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 89.66, |
|
"learning_rate": 1.6218562848936775e-06, |
|
"loss": 0.0174, |
|
"step": 73250 |
|
}, |
|
{ |
|
"epoch": 89.96, |
|
"learning_rate": 1.528577535241521e-06, |
|
"loss": 0.0072, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4893894643187819, |
|
"eval_loss": 3.0227720737457275, |
|
"eval_precision": 0.5889153512253815, |
|
"eval_recall": 0.4449414911853336, |
|
"eval_runtime": 0.5765, |
|
"eval_samples_per_second": 629.693, |
|
"eval_steps_per_second": 79.796, |
|
"step": 73530 |
|
}, |
|
{ |
|
"epoch": 90.27, |
|
"learning_rate": 1.4383336890610749e-06, |
|
"loss": 0.0151, |
|
"step": 73750 |
|
}, |
|
{ |
|
"epoch": 90.58, |
|
"learning_rate": 1.3504101989962132e-06, |
|
"loss": 0.0109, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 90.88, |
|
"learning_rate": 1.2651848839316887e-06, |
|
"loss": 0.0107, |
|
"step": 74250 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.49676183815019276, |
|
"eval_loss": 2.977621078491211, |
|
"eval_precision": 0.5930682617874735, |
|
"eval_recall": 0.45364507208719035, |
|
"eval_runtime": 0.5751, |
|
"eval_samples_per_second": 631.186, |
|
"eval_steps_per_second": 79.985, |
|
"step": 74347 |
|
}, |
|
{ |
|
"epoch": 91.19, |
|
"learning_rate": 1.182667467199558e-06, |
|
"loss": 0.0081, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 91.49, |
|
"learning_rate": 1.1028673631885173e-06, |
|
"loss": 0.0136, |
|
"step": 74750 |
|
}, |
|
{ |
|
"epoch": 91.8, |
|
"learning_rate": 1.0257936762698288e-06, |
|
"loss": 0.0105, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.4999633782778992, |
|
"eval_loss": 3.0019350051879883, |
|
"eval_precision": 0.5966416670279332, |
|
"eval_recall": 0.4551455728179866, |
|
"eval_runtime": 0.5756, |
|
"eval_samples_per_second": 630.598, |
|
"eval_steps_per_second": 79.91, |
|
"step": 75164 |
|
}, |
|
{ |
|
"epoch": 92.11, |
|
"learning_rate": 9.514551997585913e-07, |
|
"loss": 0.0061, |
|
"step": 75250 |
|
}, |
|
{ |
|
"epoch": 92.41, |
|
"learning_rate": 8.798604149105355e-07, |
|
"loss": 0.0094, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 92.72, |
|
"learning_rate": 8.110174899543743e-07, |
|
"loss": 0.0138, |
|
"step": 75750 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.6887052341597796, |
|
"eval_f1": 0.4999633782778992, |
|
"eval_loss": 3.0192649364471436, |
|
"eval_precision": 0.5966416670279332, |
|
"eval_recall": 0.4551455728179866, |
|
"eval_runtime": 0.5738, |
|
"eval_samples_per_second": 632.575, |
|
"eval_steps_per_second": 80.161, |
|
"step": 75981 |
|
}, |
|
{ |
|
"epoch": 93.02, |
|
"learning_rate": 7.449342791599201e-07, |
|
"loss": 0.0106, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 6.81618321941968e-07, |
|
"loss": 0.0107, |
|
"step": 76250 |
|
}, |
|
{ |
|
"epoch": 93.64, |
|
"learning_rate": 6.210768420001373e-07, |
|
"loss": 0.0081, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 93.94, |
|
"learning_rate": 5.633167464947242e-07, |
|
"loss": 0.0123, |
|
"step": 76750 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.48841087906946806, |
|
"eval_loss": 3.0272629261016846, |
|
"eval_precision": 0.5842731342731343, |
|
"eval_recall": 0.4449414911853336, |
|
"eval_runtime": 0.5754, |
|
"eval_samples_per_second": 630.882, |
|
"eval_steps_per_second": 79.946, |
|
"step": 76798 |
|
}, |
|
{ |
|
"epoch": 94.25, |
|
"learning_rate": 5.08344625258661e-07, |
|
"loss": 0.0113, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 94.55, |
|
"learning_rate": 4.561667500456862e-07, |
|
"loss": 0.0064, |
|
"step": 77250 |
|
}, |
|
{ |
|
"epoch": 94.86, |
|
"learning_rate": 4.067890738147978e-07, |
|
"loss": 0.0114, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.49834117213459417, |
|
"eval_loss": 3.0193073749542236, |
|
"eval_precision": 0.592981803003125, |
|
"eval_recall": 0.45445875963117344, |
|
"eval_runtime": 0.5755, |
|
"eval_samples_per_second": 630.739, |
|
"eval_steps_per_second": 79.928, |
|
"step": 77615 |
|
}, |
|
{ |
|
"epoch": 95.17, |
|
"learning_rate": 3.6021723005109365e-07, |
|
"loss": 0.013, |
|
"step": 77750 |
|
}, |
|
{ |
|
"epoch": 95.47, |
|
"learning_rate": 3.1645653212303004e-07, |
|
"loss": 0.0127, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 95.78, |
|
"learning_rate": 2.755119726762373e-07, |
|
"loss": 0.0087, |
|
"step": 78250 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4971705409799476, |
|
"eval_loss": 3.0252628326416016, |
|
"eval_precision": 0.5904316041796341, |
|
"eval_recall": 0.45445875963117344, |
|
"eval_runtime": 0.5748, |
|
"eval_samples_per_second": 631.48, |
|
"eval_steps_per_second": 80.022, |
|
"step": 78432 |
|
}, |
|
{ |
|
"epoch": 96.08, |
|
"learning_rate": 2.3738822306390575e-07, |
|
"loss": 0.0076, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 96.39, |
|
"learning_rate": 2.0208963281382054e-07, |
|
"loss": 0.0059, |
|
"step": 78750 |
|
}, |
|
{ |
|
"epoch": 96.7, |
|
"learning_rate": 1.6962022913215026e-07, |
|
"loss": 0.0112, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4971705409799476, |
|
"eval_loss": 3.0260462760925293, |
|
"eval_precision": 0.5904316041796341, |
|
"eval_recall": 0.45445875963117344, |
|
"eval_runtime": 0.575, |
|
"eval_samples_per_second": 631.313, |
|
"eval_steps_per_second": 80.001, |
|
"step": 79249 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 1.399837164439588e-07, |
|
"loss": 0.0198, |
|
"step": 79250 |
|
}, |
|
{ |
|
"epoch": 97.31, |
|
"learning_rate": 1.131834759705852e-07, |
|
"loss": 0.0108, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 97.61, |
|
"learning_rate": 8.922256534386886e-08, |
|
"loss": 0.0096, |
|
"step": 79750 |
|
}, |
|
{ |
|
"epoch": 97.92, |
|
"learning_rate": 6.818252895382516e-08, |
|
"loss": 0.0154, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4971705409799476, |
|
"eval_loss": 3.032817840576172, |
|
"eval_precision": 0.5904316041796341, |
|
"eval_recall": 0.45445875963117344, |
|
"eval_runtime": 0.5745, |
|
"eval_samples_per_second": 631.834, |
|
"eval_steps_per_second": 80.067, |
|
"step": 80066 |
|
}, |
|
{ |
|
"epoch": 98.23, |
|
"learning_rate": 4.989677258919745e-08, |
|
"loss": 0.0066, |
|
"step": 80250 |
|
}, |
|
{ |
|
"epoch": 98.53, |
|
"learning_rate": 3.4457566433288015e-08, |
|
"loss": 0.0089, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 98.84, |
|
"learning_rate": 2.1866671940751205e-08, |
|
"loss": 0.0113, |
|
"step": 80750 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4971705409799476, |
|
"eval_loss": 3.0352275371551514, |
|
"eval_precision": 0.5904316041796341, |
|
"eval_recall": 0.45445875963117344, |
|
"eval_runtime": 0.5755, |
|
"eval_samples_per_second": 630.795, |
|
"eval_steps_per_second": 79.935, |
|
"step": 80883 |
|
}, |
|
{ |
|
"epoch": 99.14, |
|
"learning_rate": 1.212552560317659e-08, |
|
"loss": 0.0052, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 99.45, |
|
"learning_rate": 5.235238785186725e-09, |
|
"loss": 0.0124, |
|
"step": 81250 |
|
}, |
|
{ |
|
"epoch": 99.76, |
|
"learning_rate": 1.1965975976552247e-09, |
|
"loss": 0.0094, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.6859504132231405, |
|
"eval_f1": 0.4971705409799476, |
|
"eval_loss": 3.034996271133423, |
|
"eval_precision": 0.5904316041796341, |
|
"eval_recall": 0.45445875963117344, |
|
"eval_runtime": 0.5749, |
|
"eval_samples_per_second": 631.447, |
|
"eval_steps_per_second": 80.018, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 81700, |
|
"total_flos": 2.247320628668568e+16, |
|
"train_loss": 0.19591666076364248, |
|
"train_runtime": 5839.9501, |
|
"train_samples_per_second": 111.885, |
|
"train_steps_per_second": 13.99 |
|
} |
|
], |
|
"max_steps": 81700, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.247320628668568e+16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 5e-06 |
|
} |
|
} |
|
|