{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 2188, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.997714808043876e-05, "loss": 5.5906, "step": 1 }, { "epoch": 0.02, "learning_rate": 4.9017367458866546e-05, "loss": 3.8301, "step": 43 }, { "epoch": 0.02, "eval_accuracy": 0.7008, "eval_f1": 0.7372365339578454, "eval_loss": 1.7894935607910156, "eval_precision": 0.6572025052192066, "eval_recall": 0.8394666666666667, "eval_runtime": 100.3646, "eval_samples_per_second": 37.364, "eval_steps_per_second": 4.673, "step": 43 }, { "epoch": 0.04, "learning_rate": 4.8034734917733096e-05, "loss": 1.7461, "step": 86 }, { "epoch": 0.04, "eval_accuracy": 0.7669333333333334, "eval_f1": 0.8065515714918104, "eval_loss": 1.6799604892730713, "eval_precision": 0.6893681422625804, "eval_recall": 0.9717333333333333, "eval_runtime": 100.5302, "eval_samples_per_second": 37.302, "eval_steps_per_second": 4.665, "step": 86 }, { "epoch": 0.06, "learning_rate": 4.705210237659964e-05, "loss": 1.5511, "step": 129 }, { "epoch": 0.06, "eval_accuracy": 0.8925333333333333, "eval_f1": 0.8882728028832826, "eval_loss": 0.5680333375930786, "eval_precision": 0.9249422632794457, "eval_recall": 0.8544, "eval_runtime": 100.4817, "eval_samples_per_second": 37.32, "eval_steps_per_second": 4.668, "step": 129 }, { "epoch": 0.08, "learning_rate": 4.606946983546618e-05, "loss": 1.2304, "step": 172 }, { "epoch": 0.08, "eval_accuracy": 0.7061333333333333, "eval_f1": 0.5838368580060423, "eval_loss": 4.393102169036865, "eval_precision": 1.0, "eval_recall": 0.41226666666666667, "eval_runtime": 100.2927, "eval_samples_per_second": 37.391, "eval_steps_per_second": 4.676, "step": 172 }, { "epoch": 0.1, "learning_rate": 4.5086837294332726e-05, "loss": 1.6111, "step": 215 }, { "epoch": 0.1, "eval_accuracy": 0.7248, "eval_f1": 0.7810776410691558, "eval_loss": 1.275352954864502, "eval_precision": 0.6484677703416696, "eval_recall": 0.9818666666666667, "eval_runtime": 100.7588, "eval_samples_per_second": 37.218, "eval_steps_per_second": 4.655, "step": 215 }, { "epoch": 0.12, "learning_rate": 4.410420475319927e-05, "loss": 1.1002, "step": 258 }, { "epoch": 0.12, "eval_accuracy": 0.848, "eval_f1": 0.8375142531356898, "eval_loss": 0.5289755463600159, "eval_precision": 0.8995713410900184, "eval_recall": 0.7834666666666666, "eval_runtime": 100.7268, "eval_samples_per_second": 37.229, "eval_steps_per_second": 4.656, "step": 258 }, { "epoch": 0.14, "learning_rate": 4.312157221206581e-05, "loss": 0.6462, "step": 301 }, { "epoch": 0.14, "eval_accuracy": 0.864, "eval_f1": 0.8452669902912622, "eval_loss": 0.6743063926696777, "eval_precision": 0.9802955665024631, "eval_recall": 0.7429333333333333, "eval_runtime": 100.4683, "eval_samples_per_second": 37.325, "eval_steps_per_second": 4.668, "step": 301 }, { "epoch": 0.16, "learning_rate": 4.213893967093236e-05, "loss": 0.6835, "step": 344 }, { "epoch": 0.16, "eval_accuracy": 0.6992, "eval_f1": 0.7657807308970099, "eval_loss": 0.9862114787101746, "eval_precision": 0.6269976198571914, "eval_recall": 0.9834666666666667, "eval_runtime": 100.9462, "eval_samples_per_second": 37.148, "eval_steps_per_second": 4.646, "step": 344 }, { "epoch": 0.18, "learning_rate": 4.1156307129798905e-05, "loss": 0.5506, "step": 387 }, { "epoch": 0.18, "eval_accuracy": 0.7984, "eval_f1": 0.7476635514018691, "eval_loss": 1.4477732181549072, "eval_precision": 0.9991079393398751, "eval_recall": 0.5973333333333334, "eval_runtime": 101.0116, "eval_samples_per_second": 37.124, "eval_steps_per_second": 4.643, "step": 387 }, { "epoch": 0.2, "learning_rate": 4.017367458866545e-05, "loss": 1.0906, "step": 430 }, { "epoch": 0.2, "eval_accuracy": 0.9165333333333333, "eval_f1": 0.9144575020497402, "eval_loss": 0.2948032021522522, "eval_precision": 0.9377802690582959, "eval_recall": 0.8922666666666667, "eval_runtime": 100.9951, "eval_samples_per_second": 37.131, "eval_steps_per_second": 4.644, "step": 430 }, { "epoch": 0.22, "learning_rate": 3.919104204753199e-05, "loss": 0.4459, "step": 473 }, { "epoch": 0.22, "eval_accuracy": 0.9082666666666667, "eval_f1": 0.9078242229367631, "eval_loss": 0.5833027362823486, "eval_precision": 0.9122240172320948, "eval_recall": 0.9034666666666666, "eval_runtime": 100.5094, "eval_samples_per_second": 37.31, "eval_steps_per_second": 4.666, "step": 473 }, { "epoch": 0.24, "learning_rate": 3.820840950639854e-05, "loss": 0.4996, "step": 516 }, { "epoch": 0.24, "eval_accuracy": 0.9210666666666667, "eval_f1": 0.9218172213417856, "eval_loss": 0.6353188157081604, "eval_precision": 0.913134484563056, "eval_recall": 0.9306666666666666, "eval_runtime": 100.6316, "eval_samples_per_second": 37.265, "eval_steps_per_second": 4.661, "step": 516 }, { "epoch": 0.26, "learning_rate": 3.7225776965265085e-05, "loss": 0.4613, "step": 559 }, { "epoch": 0.26, "eval_accuracy": 0.9290666666666667, "eval_f1": 0.926923076923077, "eval_loss": 0.43136465549468994, "eval_precision": 0.9558073654390935, "eval_recall": 0.8997333333333334, "eval_runtime": 100.9532, "eval_samples_per_second": 37.146, "eval_steps_per_second": 4.646, "step": 559 }, { "epoch": 0.28, "learning_rate": 3.624314442413163e-05, "loss": 0.3868, "step": 602 }, { "epoch": 0.28, "eval_accuracy": 0.8997333333333334, "eval_f1": 0.8932424758659853, "eval_loss": 0.6684072017669678, "eval_precision": 0.9550698239222829, "eval_recall": 0.8389333333333333, "eval_runtime": 100.7051, "eval_samples_per_second": 37.237, "eval_steps_per_second": 4.657, "step": 602 }, { "epoch": 0.29, "learning_rate": 3.526051188299818e-05, "loss": 0.3524, "step": 645 }, { "epoch": 0.29, "eval_accuracy": 0.9269333333333334, "eval_f1": 0.9261057173678533, "eval_loss": 0.49305853247642517, "eval_precision": 0.9367157665030006, "eval_recall": 0.9157333333333333, "eval_runtime": 101.0904, "eval_samples_per_second": 37.096, "eval_steps_per_second": 4.639, "step": 645 }, { "epoch": 0.31, "learning_rate": 3.4277879341864715e-05, "loss": 0.4739, "step": 688 }, { "epoch": 0.31, "eval_accuracy": 0.9192, "eval_f1": 0.9207428720899816, "eval_loss": 0.4122666120529175, "eval_precision": 0.9034907597535934, "eval_recall": 0.9386666666666666, "eval_runtime": 101.3837, "eval_samples_per_second": 36.988, "eval_steps_per_second": 4.626, "step": 688 }, { "epoch": 0.33, "learning_rate": 3.329524680073126e-05, "loss": 0.5484, "step": 731 }, { "epoch": 0.33, "eval_accuracy": 0.6853333333333333, "eval_f1": 0.7321833862914208, "eval_loss": 0.6493818759918213, "eval_precision": 0.6372975108652706, "eval_recall": 0.8602666666666666, "eval_runtime": 100.5978, "eval_samples_per_second": 37.277, "eval_steps_per_second": 4.662, "step": 731 }, { "epoch": 0.35, "learning_rate": 3.231261425959781e-05, "loss": 0.4939, "step": 774 }, { "epoch": 0.35, "eval_accuracy": 0.8562666666666666, "eval_f1": 0.8371109096403747, "eval_loss": 0.43780916929244995, "eval_precision": 0.9658298465829847, "eval_recall": 0.7386666666666667, "eval_runtime": 101.7646, "eval_samples_per_second": 36.85, "eval_steps_per_second": 4.609, "step": 774 }, { "epoch": 0.37, "learning_rate": 3.132998171846435e-05, "loss": 0.3646, "step": 817 }, { "epoch": 0.37, "eval_accuracy": 0.9093333333333333, "eval_f1": 0.9018475750577367, "eval_loss": 0.5472243428230286, "eval_precision": 0.9830081812460667, "eval_recall": 0.8330666666666666, "eval_runtime": 100.6656, "eval_samples_per_second": 37.252, "eval_steps_per_second": 4.659, "step": 817 }, { "epoch": 0.39, "learning_rate": 3.03473491773309e-05, "loss": 0.3954, "step": 860 }, { "epoch": 0.39, "eval_accuracy": 0.9216, "eval_f1": 0.9219330855018588, "eval_loss": 0.3911387324333191, "eval_precision": 0.9180327868852459, "eval_recall": 0.9258666666666666, "eval_runtime": 101.1862, "eval_samples_per_second": 37.06, "eval_steps_per_second": 4.635, "step": 860 }, { "epoch": 0.41, "learning_rate": 2.9364716636197442e-05, "loss": 0.3898, "step": 903 }, { "epoch": 0.41, "eval_accuracy": 0.8677333333333334, "eval_f1": 0.8485958485958486, "eval_loss": 0.608423113822937, "eval_precision": 0.9921484653818701, "eval_recall": 0.7413333333333333, "eval_runtime": 100.7786, "eval_samples_per_second": 37.21, "eval_steps_per_second": 4.654, "step": 903 }, { "epoch": 0.43, "learning_rate": 2.838208409506399e-05, "loss": 0.5361, "step": 946 }, { "epoch": 0.43, "eval_accuracy": 0.9045333333333333, "eval_f1": 0.8954439252336449, "eval_loss": 0.5845316648483276, "eval_precision": 0.9896707553260168, "eval_recall": 0.8176, "eval_runtime": 100.5268, "eval_samples_per_second": 37.303, "eval_steps_per_second": 4.665, "step": 946 }, { "epoch": 0.45, "learning_rate": 2.739945155393053e-05, "loss": 0.3488, "step": 989 }, { "epoch": 0.45, "eval_accuracy": 0.9032, "eval_f1": 0.90854119425548, "eval_loss": 0.6275143027305603, "eval_precision": 0.8610315186246418, "eval_recall": 0.9616, "eval_runtime": 100.5257, "eval_samples_per_second": 37.304, "eval_steps_per_second": 4.665, "step": 989 }, { "epoch": 0.47, "learning_rate": 2.641681901279708e-05, "loss": 0.6024, "step": 1032 }, { "epoch": 0.47, "eval_accuracy": 0.7704, "eval_f1": 0.8113910186199343, "eval_loss": 1.486302375793457, "eval_precision": 0.6884758364312268, "eval_recall": 0.9877333333333334, "eval_runtime": 100.9298, "eval_samples_per_second": 37.155, "eval_steps_per_second": 4.647, "step": 1032 }, { "epoch": 0.49, "learning_rate": 2.5434186471663625e-05, "loss": 0.6391, "step": 1075 }, { "epoch": 0.49, "eval_accuracy": 0.9296, "eval_f1": 0.9263803680981595, "eval_loss": 0.47947388887405396, "eval_precision": 0.9707773232028054, "eval_recall": 0.8858666666666667, "eval_runtime": 100.5142, "eval_samples_per_second": 37.308, "eval_steps_per_second": 4.666, "step": 1075 }, { "epoch": 0.51, "learning_rate": 2.4451553930530165e-05, "loss": 0.6603, "step": 1118 }, { "epoch": 0.51, "eval_accuracy": 0.9266666666666666, "eval_f1": 0.9240541286937309, "eval_loss": 0.35503658652305603, "eval_precision": 0.9581901489117984, "eval_recall": 0.8922666666666667, "eval_runtime": 100.9348, "eval_samples_per_second": 37.153, "eval_steps_per_second": 4.647, "step": 1118 }, { "epoch": 0.53, "learning_rate": 2.346892138939671e-05, "loss": 0.3186, "step": 1161 }, { "epoch": 0.53, "eval_accuracy": 0.9317333333333333, "eval_f1": 0.9288493607559756, "eval_loss": 0.4835192561149597, "eval_precision": 0.9698200812536274, "eval_recall": 0.8912, "eval_runtime": 100.4468, "eval_samples_per_second": 37.333, "eval_steps_per_second": 4.669, "step": 1161 }, { "epoch": 0.55, "learning_rate": 2.2486288848263255e-05, "loss": 0.3122, "step": 1204 }, { "epoch": 0.55, "eval_accuracy": 0.9328, "eval_f1": 0.9340314136125655, "eval_loss": 0.31904712319374084, "eval_precision": 0.9172236503856042, "eval_recall": 0.9514666666666667, "eval_runtime": 100.4601, "eval_samples_per_second": 37.328, "eval_steps_per_second": 4.669, "step": 1204 }, { "epoch": 0.57, "learning_rate": 2.1503656307129798e-05, "loss": 0.4917, "step": 1247 }, { "epoch": 0.57, "eval_accuracy": 0.9192, "eval_f1": 0.9151498179781573, "eval_loss": 0.45495307445526123, "eval_precision": 0.9634433962264151, "eval_recall": 0.8714666666666666, "eval_runtime": 100.5501, "eval_samples_per_second": 37.295, "eval_steps_per_second": 4.664, "step": 1247 }, { "epoch": 0.59, "learning_rate": 2.0521023765996345e-05, "loss": 0.448, "step": 1290 }, { "epoch": 0.59, "eval_accuracy": 0.936, "eval_f1": 0.9338113623827909, "eval_loss": 0.18482676148414612, "eval_precision": 0.9668760708166761, "eval_recall": 0.9029333333333334, "eval_runtime": 100.3779, "eval_samples_per_second": 37.359, "eval_steps_per_second": 4.672, "step": 1290 }, { "epoch": 0.61, "learning_rate": 1.953839122486289e-05, "loss": 0.3064, "step": 1333 }, { "epoch": 0.61, "eval_accuracy": 0.9402666666666667, "eval_f1": 0.9405204460966542, "eval_loss": 0.23262375593185425, "eval_precision": 0.9365415124272871, "eval_recall": 0.9445333333333333, "eval_runtime": 101.1761, "eval_samples_per_second": 37.064, "eval_steps_per_second": 4.635, "step": 1333 }, { "epoch": 0.63, "learning_rate": 1.8555758683729435e-05, "loss": 0.2274, "step": 1376 }, { "epoch": 0.63, "eval_accuracy": 0.924, "eval_f1": 0.9256844850065189, "eval_loss": 0.28850072622299194, "eval_precision": 0.9056122448979592, "eval_recall": 0.9466666666666667, "eval_runtime": 100.8319, "eval_samples_per_second": 37.191, "eval_steps_per_second": 4.651, "step": 1376 }, { "epoch": 0.65, "learning_rate": 1.7573126142595978e-05, "loss": 0.3728, "step": 1419 }, { "epoch": 0.65, "eval_accuracy": 0.9301333333333334, "eval_f1": 0.9273433166943982, "eval_loss": 0.29230690002441406, "eval_precision": 0.9659156556903524, "eval_recall": 0.8917333333333334, "eval_runtime": 100.785, "eval_samples_per_second": 37.208, "eval_steps_per_second": 4.653, "step": 1419 }, { "epoch": 0.67, "learning_rate": 1.659049360146252e-05, "loss": 0.5048, "step": 1462 }, { "epoch": 0.67, "eval_accuracy": 0.9464, "eval_f1": 0.9464713715046603, "eval_loss": 0.2164454311132431, "eval_precision": 0.9452127659574469, "eval_recall": 0.9477333333333333, "eval_runtime": 100.7467, "eval_samples_per_second": 37.222, "eval_steps_per_second": 4.655, "step": 1462 }, { "epoch": 0.69, "learning_rate": 1.5607861060329068e-05, "loss": 0.4483, "step": 1505 }, { "epoch": 0.69, "eval_accuracy": 0.9250666666666667, "eval_f1": 0.9274464239607538, "eval_loss": 0.3050013482570648, "eval_precision": 0.8988988988988988, "eval_recall": 0.9578666666666666, "eval_runtime": 100.8928, "eval_samples_per_second": 37.168, "eval_steps_per_second": 4.648, "step": 1505 }, { "epoch": 0.71, "learning_rate": 1.4625228519195613e-05, "loss": 0.3347, "step": 1548 }, { "epoch": 0.71, "eval_accuracy": 0.9466666666666667, "eval_f1": 0.9471179270227393, "eval_loss": 0.22358763217926025, "eval_precision": 0.9391714735186156, "eval_recall": 0.9552, "eval_runtime": 100.8242, "eval_samples_per_second": 37.193, "eval_steps_per_second": 4.652, "step": 1548 }, { "epoch": 0.73, "learning_rate": 1.3642595978062158e-05, "loss": 0.31, "step": 1591 }, { "epoch": 0.73, "eval_accuracy": 0.9386666666666666, "eval_f1": 0.9386011745862254, "eval_loss": 0.18921497464179993, "eval_precision": 0.9396044895777659, "eval_recall": 0.9376, "eval_runtime": 100.5904, "eval_samples_per_second": 37.28, "eval_steps_per_second": 4.662, "step": 1591 }, { "epoch": 0.75, "learning_rate": 1.2659963436928701e-05, "loss": 0.1936, "step": 1634 }, { "epoch": 0.75, "eval_accuracy": 0.9424, "eval_f1": 0.9430079155672824, "eval_loss": 0.24673737585544586, "eval_precision": 0.933159268929504, "eval_recall": 0.9530666666666666, "eval_runtime": 100.7748, "eval_samples_per_second": 37.212, "eval_steps_per_second": 4.654, "step": 1634 }, { "epoch": 0.77, "learning_rate": 1.1677330895795248e-05, "loss": 0.2855, "step": 1677 }, { "epoch": 0.77, "eval_accuracy": 0.9384, "eval_f1": 0.9391304347826087, "eval_loss": 0.37995821237564087, "eval_precision": 0.928125, "eval_recall": 0.9504, "eval_runtime": 100.6345, "eval_samples_per_second": 37.264, "eval_steps_per_second": 4.66, "step": 1677 }, { "epoch": 0.79, "learning_rate": 1.0694698354661791e-05, "loss": 0.2566, "step": 1720 }, { "epoch": 0.79, "eval_accuracy": 0.9370666666666667, "eval_f1": 0.9371671991480297, "eval_loss": 0.38548287749290466, "eval_precision": 0.935672514619883, "eval_recall": 0.9386666666666666, "eval_runtime": 101.0016, "eval_samples_per_second": 37.128, "eval_steps_per_second": 4.643, "step": 1720 }, { "epoch": 0.81, "learning_rate": 9.712065813528338e-06, "loss": 0.2966, "step": 1763 }, { "epoch": 0.81, "eval_accuracy": 0.9512, "eval_f1": 0.950068212824011, "eval_loss": 0.28995445370674133, "eval_precision": 0.9726256983240223, "eval_recall": 0.9285333333333333, "eval_runtime": 100.6179, "eval_samples_per_second": 37.27, "eval_steps_per_second": 4.661, "step": 1763 }, { "epoch": 0.83, "learning_rate": 8.729433272394881e-06, "loss": 0.2485, "step": 1806 }, { "epoch": 0.83, "eval_accuracy": 0.9549333333333333, "eval_f1": 0.9545087483176311, "eval_loss": 0.24945972859859467, "eval_precision": 0.9635869565217391, "eval_recall": 0.9456, "eval_runtime": 100.5538, "eval_samples_per_second": 37.293, "eval_steps_per_second": 4.664, "step": 1806 }, { "epoch": 0.85, "learning_rate": 7.746800731261426e-06, "loss": 0.2553, "step": 1849 }, { "epoch": 0.85, "eval_accuracy": 0.9357333333333333, "eval_f1": 0.9367620047231697, "eval_loss": 0.394607812166214, "eval_precision": 0.922004132231405, "eval_recall": 0.952, "eval_runtime": 100.5784, "eval_samples_per_second": 37.284, "eval_steps_per_second": 4.663, "step": 1849 }, { "epoch": 0.86, "learning_rate": 6.764168190127972e-06, "loss": 0.2719, "step": 1892 }, { "epoch": 0.86, "eval_accuracy": 0.9533333333333334, "eval_f1": 0.9524068534131085, "eval_loss": 0.361551433801651, "eval_precision": 0.9716981132075472, "eval_recall": 0.9338666666666666, "eval_runtime": 100.5791, "eval_samples_per_second": 37.284, "eval_steps_per_second": 4.663, "step": 1892 }, { "epoch": 0.88, "learning_rate": 5.781535648994515e-06, "loss": 0.2925, "step": 1935 }, { "epoch": 0.88, "eval_accuracy": 0.9541333333333334, "eval_f1": 0.9532862574687669, "eval_loss": 0.30848976969718933, "eval_precision": 0.9712230215827338, "eval_recall": 0.936, "eval_runtime": 100.6234, "eval_samples_per_second": 37.268, "eval_steps_per_second": 4.661, "step": 1935 }, { "epoch": 0.9, "learning_rate": 4.798903107861061e-06, "loss": 0.2477, "step": 1978 }, { "epoch": 0.9, "eval_accuracy": 0.9544, "eval_f1": 0.9536710918450284, "eval_loss": 0.28153562545776367, "eval_precision": 0.9691629955947136, "eval_recall": 0.9386666666666666, "eval_runtime": 100.4809, "eval_samples_per_second": 37.321, "eval_steps_per_second": 4.668, "step": 1978 }, { "epoch": 0.92, "learning_rate": 3.816270566727605e-06, "loss": 0.2164, "step": 2021 }, { "epoch": 0.92, "eval_accuracy": 0.9568, "eval_f1": 0.9560499186109603, "eval_loss": 0.24747176468372345, "eval_precision": 0.9729431253451132, "eval_recall": 0.9397333333333333, "eval_runtime": 100.5076, "eval_samples_per_second": 37.311, "eval_steps_per_second": 4.666, "step": 2021 }, { "epoch": 0.94, "learning_rate": 2.83363802559415e-06, "loss": 0.2816, "step": 2064 }, { "epoch": 0.94, "eval_accuracy": 0.9570666666666666, "eval_f1": 0.9563093622795115, "eval_loss": 0.263465017080307, "eval_precision": 0.9734806629834254, "eval_recall": 0.9397333333333333, "eval_runtime": 100.4934, "eval_samples_per_second": 37.316, "eval_steps_per_second": 4.667, "step": 2064 }, { "epoch": 0.96, "learning_rate": 1.851005484460695e-06, "loss": 0.351, "step": 2107 }, { "epoch": 0.96, "eval_accuracy": 0.9573333333333334, "eval_f1": 0.9566160520607375, "eval_loss": 0.23055072128772736, "eval_precision": 0.972972972972973, "eval_recall": 0.9408, "eval_runtime": 100.5497, "eval_samples_per_second": 37.295, "eval_steps_per_second": 4.664, "step": 2107 }, { "epoch": 0.98, "learning_rate": 8.683729433272396e-07, "loss": 0.2591, "step": 2150 }, { "epoch": 0.98, "eval_accuracy": 0.9562666666666667, "eval_f1": 0.955288985823337, "eval_loss": 0.2391371726989746, "eval_precision": 0.9771332961517011, "eval_recall": 0.9344, "eval_runtime": 100.6094, "eval_samples_per_second": 37.273, "eval_steps_per_second": 4.662, "step": 2150 } ], "max_steps": 2188, "num_train_epochs": 1, "total_flos": 1.62524823552e+16, "trial_name": null, "trial_params": null }