|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 1750, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9971428571428576e-05, |
|
"loss": 2.5589, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9e-05, |
|
"loss": 4.0188, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.5033333333333333, |
|
"eval_f1": 0.015852047556142668, |
|
"eval_loss": 1.71467924118042, |
|
"eval_precision": 0.8571428571428571, |
|
"eval_recall": 0.008, |
|
"eval_runtime": 245.0349, |
|
"eval_samples_per_second": 12.243, |
|
"eval_steps_per_second": 1.53, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.1269, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5356666666666666, |
|
"eval_f1": 0.15932407966203982, |
|
"eval_loss": 0.8176602125167847, |
|
"eval_precision": 0.8407643312101911, |
|
"eval_recall": 0.088, |
|
"eval_runtime": 245.0774, |
|
"eval_samples_per_second": 12.241, |
|
"eval_steps_per_second": 1.53, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.9901, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.5946666666666667, |
|
"eval_f1": 0.36401673640167365, |
|
"eval_loss": 1.4929765462875366, |
|
"eval_precision": 0.8446601941747572, |
|
"eval_recall": 0.232, |
|
"eval_runtime": 244.7538, |
|
"eval_samples_per_second": 12.257, |
|
"eval_steps_per_second": 1.532, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.2113, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.619, |
|
"eval_f1": 0.7197842608482471, |
|
"eval_loss": 0.744438648223877, |
|
"eval_precision": 0.5692128732066692, |
|
"eval_recall": 0.9786666666666667, |
|
"eval_runtime": 244.6225, |
|
"eval_samples_per_second": 12.264, |
|
"eval_steps_per_second": 1.533, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.7836, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.689, |
|
"eval_f1": 0.5959289735816371, |
|
"eval_loss": 0.6166332364082336, |
|
"eval_precision": 0.8504326328800988, |
|
"eval_recall": 0.45866666666666667, |
|
"eval_runtime": 244.7744, |
|
"eval_samples_per_second": 12.256, |
|
"eval_steps_per_second": 1.532, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.7519, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.775, |
|
"eval_f1": 0.7330960854092525, |
|
"eval_loss": 0.6699690818786621, |
|
"eval_precision": 0.9008746355685131, |
|
"eval_recall": 0.618, |
|
"eval_runtime": 245.4062, |
|
"eval_samples_per_second": 12.225, |
|
"eval_steps_per_second": 1.528, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.0327, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.7083333333333334, |
|
"eval_f1": 0.5988078862906923, |
|
"eval_loss": 0.5815353393554688, |
|
"eval_precision": 0.9588839941262849, |
|
"eval_recall": 0.43533333333333335, |
|
"eval_runtime": 245.1736, |
|
"eval_samples_per_second": 12.236, |
|
"eval_steps_per_second": 1.53, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.7039, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.7043333333333334, |
|
"eval_f1": 0.5891616489115331, |
|
"eval_loss": 1.3649965524673462, |
|
"eval_precision": 0.9650986342943855, |
|
"eval_recall": 0.424, |
|
"eval_runtime": 247.043, |
|
"eval_samples_per_second": 12.144, |
|
"eval_steps_per_second": 1.518, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.26, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.7916666666666666, |
|
"eval_f1": 0.7464503042596349, |
|
"eval_loss": 0.6255013942718506, |
|
"eval_precision": 0.9533678756476683, |
|
"eval_recall": 0.6133333333333333, |
|
"eval_runtime": 245.1858, |
|
"eval_samples_per_second": 12.236, |
|
"eval_steps_per_second": 1.529, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5196, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.855, |
|
"eval_f1": 0.8476357267950964, |
|
"eval_loss": 0.4524073004722595, |
|
"eval_precision": 0.8929889298892989, |
|
"eval_recall": 0.8066666666666666, |
|
"eval_runtime": 244.8477, |
|
"eval_samples_per_second": 12.253, |
|
"eval_steps_per_second": 1.532, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.7179, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.795, |
|
"eval_f1": 0.76409666283084, |
|
"eval_loss": 0.6408818960189819, |
|
"eval_precision": 0.8997289972899729, |
|
"eval_recall": 0.664, |
|
"eval_runtime": 244.8569, |
|
"eval_samples_per_second": 12.252, |
|
"eval_steps_per_second": 1.532, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.7823, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.821, |
|
"eval_f1": 0.7869892899643, |
|
"eval_loss": 0.4732283651828766, |
|
"eval_precision": 0.9715964740450539, |
|
"eval_recall": 0.6613333333333333, |
|
"eval_runtime": 244.9263, |
|
"eval_samples_per_second": 12.249, |
|
"eval_steps_per_second": 1.531, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.2816, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.89, |
|
"eval_f1": 0.8825622775800712, |
|
"eval_loss": 0.5548559427261353, |
|
"eval_precision": 0.9465648854961832, |
|
"eval_recall": 0.8266666666666667, |
|
"eval_runtime": 245.0027, |
|
"eval_samples_per_second": 12.245, |
|
"eval_steps_per_second": 1.531, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.3043, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.8976666666666666, |
|
"eval_f1": 0.8971524288107202, |
|
"eval_loss": 0.48737743496894836, |
|
"eval_precision": 0.9016835016835016, |
|
"eval_recall": 0.8926666666666667, |
|
"eval_runtime": 245.4898, |
|
"eval_samples_per_second": 12.22, |
|
"eval_steps_per_second": 1.528, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.8005, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.8443333333333334, |
|
"eval_f1": 0.8613831997625407, |
|
"eval_loss": 0.899199366569519, |
|
"eval_precision": 0.7763509898341359, |
|
"eval_recall": 0.9673333333333334, |
|
"eval_runtime": 245.655, |
|
"eval_samples_per_second": 12.212, |
|
"eval_steps_per_second": 1.527, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.5206, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.881, |
|
"eval_f1": 0.8879824286162535, |
|
"eval_loss": 0.5689557194709778, |
|
"eval_precision": 0.8387670420865442, |
|
"eval_recall": 0.9433333333333334, |
|
"eval_runtime": 245.8808, |
|
"eval_samples_per_second": 12.201, |
|
"eval_steps_per_second": 1.525, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.2982, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.8383333333333334, |
|
"eval_f1": 0.8556977090151742, |
|
"eval_loss": 1.3064663410186768, |
|
"eval_precision": 0.7727028479312198, |
|
"eval_recall": 0.9586666666666667, |
|
"eval_runtime": 244.8259, |
|
"eval_samples_per_second": 12.254, |
|
"eval_steps_per_second": 1.532, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.159, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.8866666666666667, |
|
"eval_f1": 0.8924731182795699, |
|
"eval_loss": 0.6431537866592407, |
|
"eval_precision": 0.8489771359807461, |
|
"eval_recall": 0.9406666666666667, |
|
"eval_runtime": 244.8113, |
|
"eval_samples_per_second": 12.254, |
|
"eval_steps_per_second": 1.532, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.4328, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.8996666666666666, |
|
"eval_f1": 0.9056130448416432, |
|
"eval_loss": 0.4550739824771881, |
|
"eval_precision": 0.8549437537004144, |
|
"eval_recall": 0.9626666666666667, |
|
"eval_runtime": 250.1284, |
|
"eval_samples_per_second": 11.994, |
|
"eval_steps_per_second": 1.499, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4249, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.8913333333333333, |
|
"eval_f1": 0.8823953823953824, |
|
"eval_loss": 0.40748831629753113, |
|
"eval_precision": 0.9614779874213837, |
|
"eval_recall": 0.8153333333333334, |
|
"eval_runtime": 244.9069, |
|
"eval_samples_per_second": 12.25, |
|
"eval_steps_per_second": 1.531, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.435, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.9173333333333333, |
|
"eval_f1": 0.9194805194805196, |
|
"eval_loss": 0.28212666511535645, |
|
"eval_precision": 0.8962025316455696, |
|
"eval_recall": 0.944, |
|
"eval_runtime": 247.2532, |
|
"eval_samples_per_second": 12.133, |
|
"eval_steps_per_second": 1.517, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.2909, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.9186666666666666, |
|
"eval_f1": 0.9155124653739612, |
|
"eval_loss": 0.2652963101863861, |
|
"eval_precision": 0.952449567723343, |
|
"eval_recall": 0.8813333333333333, |
|
"eval_runtime": 244.8465, |
|
"eval_samples_per_second": 12.253, |
|
"eval_steps_per_second": 1.532, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.2164, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.908, |
|
"eval_f1": 0.911651728553137, |
|
"eval_loss": 0.41052377223968506, |
|
"eval_precision": 0.8768472906403941, |
|
"eval_recall": 0.9493333333333334, |
|
"eval_runtime": 244.9871, |
|
"eval_samples_per_second": 12.246, |
|
"eval_steps_per_second": 1.531, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.2741, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.9086666666666666, |
|
"eval_f1": 0.9023521026372059, |
|
"eval_loss": 0.35454556345939636, |
|
"eval_precision": 0.9693721286370597, |
|
"eval_recall": 0.844, |
|
"eval_runtime": 245.5305, |
|
"eval_samples_per_second": 12.218, |
|
"eval_steps_per_second": 1.527, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3406, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.9306666666666666, |
|
"eval_f1": 0.9311258278145697, |
|
"eval_loss": 0.2322322428226471, |
|
"eval_precision": 0.925, |
|
"eval_recall": 0.9373333333333334, |
|
"eval_runtime": 244.978, |
|
"eval_samples_per_second": 12.246, |
|
"eval_steps_per_second": 1.531, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.2471, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.9176666666666666, |
|
"eval_f1": 0.9146804835924007, |
|
"eval_loss": 0.3999188542366028, |
|
"eval_precision": 0.9491039426523298, |
|
"eval_recall": 0.8826666666666667, |
|
"eval_runtime": 246.19, |
|
"eval_samples_per_second": 12.186, |
|
"eval_steps_per_second": 1.523, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.4955, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.8633333333333333, |
|
"eval_f1": 0.844106463878327, |
|
"eval_loss": 0.5945030450820923, |
|
"eval_precision": 0.9823008849557522, |
|
"eval_recall": 0.74, |
|
"eval_runtime": 245.9264, |
|
"eval_samples_per_second": 12.199, |
|
"eval_steps_per_second": 1.525, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.3085, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.9136666666666666, |
|
"eval_f1": 0.908641975308642, |
|
"eval_loss": 0.39902010560035706, |
|
"eval_precision": 0.9647940074906367, |
|
"eval_recall": 0.8586666666666667, |
|
"eval_runtime": 245.3453, |
|
"eval_samples_per_second": 12.228, |
|
"eval_steps_per_second": 1.528, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.513, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.9236666666666666, |
|
"eval_f1": 0.9236921026324558, |
|
"eval_loss": 0.21340703964233398, |
|
"eval_precision": 0.9233844103930713, |
|
"eval_recall": 0.924, |
|
"eval_runtime": 244.8648, |
|
"eval_samples_per_second": 12.252, |
|
"eval_steps_per_second": 1.531, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2576, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.9283333333333333, |
|
"eval_f1": 0.9271926854046733, |
|
"eval_loss": 0.30752047896385193, |
|
"eval_precision": 0.9421885753613214, |
|
"eval_recall": 0.9126666666666666, |
|
"eval_runtime": 319.7381, |
|
"eval_samples_per_second": 9.383, |
|
"eval_steps_per_second": 1.173, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.3558, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.9276666666666666, |
|
"eval_f1": 0.9297961824652217, |
|
"eval_loss": 0.24874693155288696, |
|
"eval_precision": 0.9032055311125079, |
|
"eval_recall": 0.958, |
|
"eval_runtime": 245.9624, |
|
"eval_samples_per_second": 12.197, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.2618, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.9396666666666667, |
|
"eval_f1": 0.9407528641571195, |
|
"eval_loss": 0.3159142732620239, |
|
"eval_precision": 0.9241157556270096, |
|
"eval_recall": 0.958, |
|
"eval_runtime": 245.9065, |
|
"eval_samples_per_second": 12.2, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.2992, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.9313333333333333, |
|
"eval_f1": 0.9339320076972418, |
|
"eval_loss": 0.33460330963134766, |
|
"eval_precision": 0.899876390605686, |
|
"eval_recall": 0.9706666666666667, |
|
"eval_runtime": 245.128, |
|
"eval_samples_per_second": 12.239, |
|
"eval_steps_per_second": 1.53, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.2474, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.9346666666666666, |
|
"eval_f1": 0.9332879509870661, |
|
"eval_loss": 0.265165776014328, |
|
"eval_precision": 0.9534075104311543, |
|
"eval_recall": 0.914, |
|
"eval_runtime": 245.8947, |
|
"eval_samples_per_second": 12.2, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3008, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.9423333333333334, |
|
"eval_f1": 0.9421211107393778, |
|
"eval_loss": 0.22882609069347382, |
|
"eval_precision": 0.9456010745466756, |
|
"eval_recall": 0.9386666666666666, |
|
"eval_runtime": 245.6581, |
|
"eval_samples_per_second": 12.212, |
|
"eval_steps_per_second": 1.527, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.2605, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.9256666666666666, |
|
"eval_f1": 0.9292288162488098, |
|
"eval_loss": 0.3908889591693878, |
|
"eval_precision": 0.8867353119321624, |
|
"eval_recall": 0.976, |
|
"eval_runtime": 246.273, |
|
"eval_samples_per_second": 12.182, |
|
"eval_steps_per_second": 1.523, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.2565, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.9313333333333333, |
|
"eval_f1": 0.9340588988476314, |
|
"eval_loss": 0.39725252985954285, |
|
"eval_precision": 0.8983990147783252, |
|
"eval_recall": 0.9726666666666667, |
|
"eval_runtime": 245.9637, |
|
"eval_samples_per_second": 12.197, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.3269, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.9373333333333334, |
|
"eval_f1": 0.936141304347826, |
|
"eval_loss": 0.2685074806213379, |
|
"eval_precision": 0.9542936288088643, |
|
"eval_recall": 0.9186666666666666, |
|
"eval_runtime": 245.6322, |
|
"eval_samples_per_second": 12.213, |
|
"eval_steps_per_second": 1.527, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.2149, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.934, |
|
"eval_f1": 0.931582584657913, |
|
"eval_loss": 0.26127538084983826, |
|
"eval_precision": 0.9670014347202296, |
|
"eval_recall": 0.8986666666666666, |
|
"eval_runtime": 244.9705, |
|
"eval_samples_per_second": 12.246, |
|
"eval_steps_per_second": 1.531, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-05, |
|
"loss": 0.204, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.944, |
|
"eval_f1": 0.9432048681541583, |
|
"eval_loss": 0.22753384709358215, |
|
"eval_precision": 0.9567901234567902, |
|
"eval_recall": 0.93, |
|
"eval_runtime": 246.146, |
|
"eval_samples_per_second": 12.188, |
|
"eval_steps_per_second": 1.523, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9e-06, |
|
"loss": 0.165, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.9433333333333334, |
|
"eval_f1": 0.9422946367956553, |
|
"eval_loss": 0.22660386562347412, |
|
"eval_precision": 0.9598893499308437, |
|
"eval_recall": 0.9253333333333333, |
|
"eval_runtime": 244.7679, |
|
"eval_samples_per_second": 12.257, |
|
"eval_steps_per_second": 1.532, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.2662, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.938, |
|
"eval_f1": 0.9363449691991785, |
|
"eval_loss": 0.2438994199037552, |
|
"eval_precision": 0.9620253164556962, |
|
"eval_recall": 0.912, |
|
"eval_runtime": 246.2921, |
|
"eval_samples_per_second": 12.181, |
|
"eval_steps_per_second": 1.523, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.1704, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9446666666666667, |
|
"eval_f1": 0.9449966865473823, |
|
"eval_loss": 0.24109722673892975, |
|
"eval_precision": 0.9393939393939394, |
|
"eval_recall": 0.9506666666666667, |
|
"eval_runtime": 244.6596, |
|
"eval_samples_per_second": 12.262, |
|
"eval_steps_per_second": 1.533, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6e-06, |
|
"loss": 0.1694, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.9423333333333334, |
|
"eval_f1": 0.9411364409663151, |
|
"eval_loss": 0.24637927114963531, |
|
"eval_precision": 0.9610840861709521, |
|
"eval_recall": 0.922, |
|
"eval_runtime": 246.6002, |
|
"eval_samples_per_second": 12.165, |
|
"eval_steps_per_second": 1.521, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2994, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.947, |
|
"eval_f1": 0.9469469469469469, |
|
"eval_loss": 0.22698351740837097, |
|
"eval_precision": 0.9478957915831663, |
|
"eval_recall": 0.946, |
|
"eval_runtime": 244.8716, |
|
"eval_samples_per_second": 12.251, |
|
"eval_steps_per_second": 1.531, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.1833, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9443333333333334, |
|
"eval_f1": 0.9438655462184874, |
|
"eval_loss": 0.23095431923866272, |
|
"eval_precision": 0.951864406779661, |
|
"eval_recall": 0.936, |
|
"eval_runtime": 245.8383, |
|
"eval_samples_per_second": 12.203, |
|
"eval_steps_per_second": 1.525, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3e-06, |
|
"loss": 0.2646, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.9476666666666667, |
|
"eval_f1": 0.9484400656814449, |
|
"eval_loss": 0.27593639492988586, |
|
"eval_precision": 0.9346278317152104, |
|
"eval_recall": 0.9626666666666667, |
|
"eval_runtime": 244.6822, |
|
"eval_samples_per_second": 12.261, |
|
"eval_steps_per_second": 1.533, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1927, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.949, |
|
"eval_f1": 0.9487437185929648, |
|
"eval_loss": 0.2200535088777542, |
|
"eval_precision": 0.9535353535353536, |
|
"eval_recall": 0.944, |
|
"eval_runtime": 245.0433, |
|
"eval_samples_per_second": 12.243, |
|
"eval_steps_per_second": 1.53, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.2153, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.948, |
|
"eval_f1": 0.9477911646586344, |
|
"eval_loss": 0.2146720141172409, |
|
"eval_precision": 0.9516129032258065, |
|
"eval_recall": 0.944, |
|
"eval_runtime": 246.1882, |
|
"eval_samples_per_second": 12.186, |
|
"eval_steps_per_second": 1.523, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.1213, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.948, |
|
"eval_f1": 0.9478957915831663, |
|
"eval_loss": 0.21715900301933289, |
|
"eval_precision": 0.9497991967871486, |
|
"eval_recall": 0.946, |
|
"eval_runtime": 245.0021, |
|
"eval_samples_per_second": 12.245, |
|
"eval_steps_per_second": 1.531, |
|
"step": 1750 |
|
} |
|
], |
|
"max_steps": 1750, |
|
"num_train_epochs": 1, |
|
"total_flos": 5.1979933974528e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|