diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,6148 @@ +{ + "best_metric": 0.9160090191657272, + "best_model_checkpoint": "swinv2-base-patch4-window12-192-22k-finetuned-lora-ISIC-2019/checkpoint-4875", + "epoch": 99.2, + "eval_steps": 500, + "global_step": 6200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.16, + "learning_rate": 0.0009983870967741936, + "loss": 1.4282, + "step": 10 + }, + { + "epoch": 0.32, + "learning_rate": 0.0009970967741935483, + "loss": 1.1082, + "step": 20 + }, + { + "epoch": 0.48, + "learning_rate": 0.0009954838709677419, + "loss": 1.0397, + "step": 30 + }, + { + "epoch": 0.64, + "learning_rate": 0.0009938709677419356, + "loss": 0.9258, + "step": 40 + }, + { + "epoch": 0.8, + "learning_rate": 0.0009922580645161292, + "loss": 0.8897, + "step": 50 + }, + { + "epoch": 0.96, + "learning_rate": 0.0009906451612903225, + "loss": 0.8488, + "step": 60 + }, + { + "epoch": 0.99, + "step": 62, + "train_accuracy": 0.718556119571348, + "train_f1": 0.6867379798585972, + "train_loss": 0.770628809928894, + "train_precision": 0.708956589572109, + "train_recall": 0.718556119571348, + "train_runtime": 291.5866, + "train_samples_per_second": 54.725, + "train_steps_per_second": 0.857 + }, + { + "epoch": 0.99, + "eval_accuracy": 0.7153325817361894, + "eval_f1": 0.6823083139813236, + "eval_loss": 0.7977813482284546, + "eval_precision": 0.7199162830832594, + "eval_recall": 0.7153325817361894, + "eval_runtime": 49.2679, + "eval_samples_per_second": 36.007, + "eval_steps_per_second": 0.568, + "step": 62 + }, + { + "epoch": 1.12, + "learning_rate": 0.000989032258064516, + "loss": 0.7836, + "step": 70 + }, + { + "epoch": 1.28, + "learning_rate": 0.0009874193548387096, + "loss": 0.772, + "step": 80 + }, + { + "epoch": 1.44, + "learning_rate": 0.0009858064516129032, + "loss": 0.8074, + "step": 90 + }, + { + "epoch": 1.6, + "learning_rate": 0.0009841935483870968, + "loss": 0.7912, + "step": 100 + }, + { + "epoch": 1.76, + "learning_rate": 0.0009825806451612903, + "loss": 0.7762, + "step": 110 + }, + { + "epoch": 1.92, + "learning_rate": 0.0009809677419354839, + "loss": 0.7795, + "step": 120 + }, + { + "epoch": 2.0, + "step": 125, + "train_accuracy": 0.7600426145265401, + "train_f1": 0.7540133314127565, + "train_loss": 0.6639156937599182, + "train_precision": 0.7611520386158022, + "train_recall": 0.7600426145265401, + "train_runtime": 254.6664, + "train_samples_per_second": 62.658, + "train_steps_per_second": 0.982 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.7497181510710259, + "eval_f1": 0.7388896367994308, + "eval_loss": 0.6961308717727661, + "eval_precision": 0.7541740918352317, + "eval_recall": 0.7497181510710259, + "eval_runtime": 48.7662, + "eval_samples_per_second": 36.378, + "eval_steps_per_second": 0.574, + "step": 125 + }, + { + "epoch": 2.08, + "learning_rate": 0.0009793548387096774, + "loss": 0.7586, + "step": 130 + }, + { + "epoch": 2.24, + "learning_rate": 0.000977741935483871, + "loss": 0.705, + "step": 140 + }, + { + "epoch": 2.4, + "learning_rate": 0.0009761290322580644, + "loss": 0.6983, + "step": 150 + }, + { + "epoch": 2.56, + "learning_rate": 0.0009745161290322581, + "loss": 0.6913, + "step": 160 + }, + { + "epoch": 2.72, + "learning_rate": 0.0009729032258064517, + "loss": 0.6849, + "step": 170 + }, + { + "epoch": 2.88, + "learning_rate": 0.0009714516129032258, + "loss": 0.7169, + "step": 180 + }, + { + "epoch": 2.99, + "step": 187, + "train_accuracy": 0.7776524409350128, + "train_f1": 0.7639537137185656, + "train_loss": 0.6073001027107239, + "train_precision": 0.7743141490761454, + "train_recall": 0.7776524409350128, + "train_runtime": 256.1493, + "train_samples_per_second": 62.296, + "train_steps_per_second": 0.976 + }, + { + "epoch": 2.99, + "eval_accuracy": 0.7717023675310034, + "eval_f1": 0.7547317483415474, + "eval_loss": 0.6364655494689941, + "eval_precision": 0.7658539052724858, + "eval_recall": 0.7717023675310034, + "eval_runtime": 48.4464, + "eval_samples_per_second": 36.618, + "eval_steps_per_second": 0.578, + "step": 187 + }, + { + "epoch": 3.04, + "learning_rate": 0.0009698387096774194, + "loss": 0.6428, + "step": 190 + }, + { + "epoch": 3.2, + "learning_rate": 0.0009682258064516129, + "loss": 0.6604, + "step": 200 + }, + { + "epoch": 3.36, + "learning_rate": 0.0009666129032258064, + "loss": 0.6531, + "step": 210 + }, + { + "epoch": 3.52, + "learning_rate": 0.000965, + "loss": 0.643, + "step": 220 + }, + { + "epoch": 3.68, + "learning_rate": 0.0009633870967741936, + "loss": 0.6918, + "step": 230 + }, + { + "epoch": 3.84, + "learning_rate": 0.0009617741935483872, + "loss": 0.6442, + "step": 240 + }, + { + "epoch": 4.0, + "learning_rate": 0.0009601612903225807, + "loss": 0.6576, + "step": 250 + }, + { + "epoch": 4.0, + "step": 250, + "train_accuracy": 0.7903114620542708, + "train_f1": 0.7760264280771675, + "train_loss": 0.5970289707183838, + "train_precision": 0.7953318203400003, + "train_recall": 0.7903114620542708, + "train_runtime": 256.7891, + "train_samples_per_second": 62.14, + "train_steps_per_second": 0.974 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.790304396843292, + "eval_f1": 0.7777020089235339, + "eval_loss": 0.6154965162277222, + "eval_precision": 0.7982692040791739, + "eval_recall": 0.790304396843292, + "eval_runtime": 46.9309, + "eval_samples_per_second": 37.8, + "eval_steps_per_second": 0.597, + "step": 250 + }, + { + "epoch": 4.16, + "learning_rate": 0.0009585483870967742, + "loss": 0.6104, + "step": 260 + }, + { + "epoch": 4.32, + "learning_rate": 0.0009569354838709677, + "loss": 0.5991, + "step": 270 + }, + { + "epoch": 4.48, + "learning_rate": 0.0009553225806451613, + "loss": 0.625, + "step": 280 + }, + { + "epoch": 4.64, + "learning_rate": 0.0009537096774193548, + "loss": 0.6223, + "step": 290 + }, + { + "epoch": 4.8, + "learning_rate": 0.0009520967741935485, + "loss": 0.6309, + "step": 300 + }, + { + "epoch": 4.96, + "learning_rate": 0.000950483870967742, + "loss": 0.6164, + "step": 310 + }, + { + "epoch": 4.99, + "step": 312, + "train_accuracy": 0.8017797831672621, + "train_f1": 0.7931150004910923, + "train_loss": 0.5476173758506775, + "train_precision": 0.7996150347194898, + "train_recall": 0.8017797831672621, + "train_runtime": 254.273, + "train_samples_per_second": 62.755, + "train_steps_per_second": 0.983 + }, + { + "epoch": 4.99, + "eval_accuracy": 0.7733934611048479, + "eval_f1": 0.7627200347192373, + "eval_loss": 0.5989590883255005, + "eval_precision": 0.7799215570339676, + "eval_recall": 0.7733934611048479, + "eval_runtime": 46.2691, + "eval_samples_per_second": 38.341, + "eval_steps_per_second": 0.605, + "step": 312 + }, + { + "epoch": 5.12, + "learning_rate": 0.0009488709677419355, + "loss": 0.6067, + "step": 320 + }, + { + "epoch": 5.28, + "learning_rate": 0.0009472580645161291, + "loss": 0.6144, + "step": 330 + }, + { + "epoch": 5.44, + "learning_rate": 0.0009456451612903226, + "loss": 0.5815, + "step": 340 + }, + { + "epoch": 5.6, + "learning_rate": 0.0009440322580645162, + "loss": 0.6036, + "step": 350 + }, + { + "epoch": 5.76, + "learning_rate": 0.0009424193548387096, + "loss": 0.5748, + "step": 360 + }, + { + "epoch": 5.92, + "learning_rate": 0.0009408064516129032, + "loss": 0.5779, + "step": 370 + }, + { + "epoch": 6.0, + "step": 375, + "train_accuracy": 0.8046625305508555, + "train_f1": 0.8003331690711911, + "train_loss": 0.5287690758705139, + "train_precision": 0.8066353914139762, + "train_recall": 0.8046625305508555, + "train_runtime": 254.1046, + "train_samples_per_second": 62.797, + "train_steps_per_second": 0.984 + }, + { + "epoch": 6.0, + "eval_accuracy": 0.7745208568207441, + "eval_f1": 0.7668632966865262, + "eval_loss": 0.602535605430603, + "eval_precision": 0.7770132133553053, + "eval_recall": 0.7745208568207441, + "eval_runtime": 38.0169, + "eval_samples_per_second": 46.663, + "eval_steps_per_second": 0.737, + "step": 375 + }, + { + "epoch": 6.08, + "learning_rate": 0.0009391935483870969, + "loss": 0.5808, + "step": 380 + }, + { + "epoch": 6.24, + "learning_rate": 0.0009375806451612904, + "loss": 0.5692, + "step": 390 + }, + { + "epoch": 6.4, + "learning_rate": 0.0009359677419354839, + "loss": 0.5525, + "step": 400 + }, + { + "epoch": 6.56, + "learning_rate": 0.0009343548387096774, + "loss": 0.562, + "step": 410 + }, + { + "epoch": 6.72, + "learning_rate": 0.000932741935483871, + "loss": 0.5858, + "step": 420 + }, + { + "epoch": 6.88, + "learning_rate": 0.0009311290322580645, + "loss": 0.5647, + "step": 430 + }, + { + "epoch": 6.99, + "step": 437, + "train_accuracy": 0.8250924359215391, + "train_f1": 0.8156580365652172, + "train_loss": 0.4726848304271698, + "train_precision": 0.8244531001171268, + "train_recall": 0.8250924359215391, + "train_runtime": 254.8694, + "train_samples_per_second": 62.609, + "train_steps_per_second": 0.981 + }, + { + "epoch": 6.99, + "eval_accuracy": 0.8094701240135288, + "eval_f1": 0.8000649092582361, + "eval_loss": 0.5391651391983032, + "eval_precision": 0.8082600629133598, + "eval_recall": 0.8094701240135288, + "eval_runtime": 33.5973, + "eval_samples_per_second": 52.802, + "eval_steps_per_second": 0.833, + "step": 437 + }, + { + "epoch": 7.04, + "learning_rate": 0.0009295161290322581, + "loss": 0.5795, + "step": 440 + }, + { + "epoch": 7.2, + "learning_rate": 0.0009279032258064515, + "loss": 0.535, + "step": 450 + }, + { + "epoch": 7.36, + "learning_rate": 0.0009262903225806451, + "loss": 0.5441, + "step": 460 + }, + { + "epoch": 7.52, + "learning_rate": 0.0009246774193548388, + "loss": 0.5329, + "step": 470 + }, + { + "epoch": 7.68, + "learning_rate": 0.0009230645161290323, + "loss": 0.5191, + "step": 480 + }, + { + "epoch": 7.84, + "learning_rate": 0.0009214516129032259, + "loss": 0.5428, + "step": 490 + }, + { + "epoch": 8.0, + "learning_rate": 0.0009198387096774193, + "loss": 0.5062, + "step": 500 + }, + { + "epoch": 8.0, + "step": 500, + "train_accuracy": 0.8309832675314909, + "train_f1": 0.8223807681274183, + "train_loss": 0.4595116674900055, + "train_precision": 0.8314452738097804, + "train_recall": 0.8309832675314909, + "train_runtime": 254.2706, + "train_samples_per_second": 62.756, + "train_steps_per_second": 0.983 + }, + { + "epoch": 8.0, + "eval_accuracy": 0.8015783540022547, + "eval_f1": 0.7895223836045016, + "eval_loss": 0.5510557293891907, + "eval_precision": 0.8031324191974503, + "eval_recall": 0.8015783540022547, + "eval_runtime": 33.1712, + "eval_samples_per_second": 53.48, + "eval_steps_per_second": 0.844, + "step": 500 + }, + { + "epoch": 8.16, + "learning_rate": 0.0009182258064516129, + "loss": 0.5424, + "step": 510 + }, + { + "epoch": 8.32, + "learning_rate": 0.0009166129032258065, + "loss": 0.5172, + "step": 520 + }, + { + "epoch": 8.48, + "learning_rate": 0.000915, + "loss": 0.5081, + "step": 530 + }, + { + "epoch": 8.64, + "learning_rate": 0.0009133870967741935, + "loss": 0.5357, + "step": 540 + }, + { + "epoch": 8.8, + "learning_rate": 0.0009117741935483871, + "loss": 0.5317, + "step": 550 + }, + { + "epoch": 8.96, + "learning_rate": 0.0009101612903225807, + "loss": 0.5479, + "step": 560 + }, + { + "epoch": 8.99, + "step": 562, + "train_accuracy": 0.8421382465375697, + "train_f1": 0.8356541787788663, + "train_loss": 0.4283278286457062, + "train_precision": 0.8418489980800979, + "train_recall": 0.8421382465375697, + "train_runtime": 253.325, + "train_samples_per_second": 62.99, + "train_steps_per_second": 0.987 + }, + { + "epoch": 8.99, + "eval_accuracy": 0.8156708004509583, + "eval_f1": 0.8099706186065314, + "eval_loss": 0.5151851773262024, + "eval_precision": 0.8175937941832333, + "eval_recall": 0.8156708004509583, + "eval_runtime": 29.3964, + "eval_samples_per_second": 60.347, + "eval_steps_per_second": 0.952, + "step": 562 + }, + { + "epoch": 9.12, + "learning_rate": 0.0009087096774193548, + "loss": 0.4922, + "step": 570 + }, + { + "epoch": 9.28, + "learning_rate": 0.0009070967741935484, + "loss": 0.4682, + "step": 580 + }, + { + "epoch": 9.44, + "learning_rate": 0.0009054838709677419, + "loss": 0.5159, + "step": 590 + }, + { + "epoch": 9.6, + "learning_rate": 0.0009038709677419356, + "loss": 0.5072, + "step": 600 + }, + { + "epoch": 9.76, + "learning_rate": 0.0009022580645161291, + "loss": 0.4946, + "step": 610 + }, + { + "epoch": 9.92, + "learning_rate": 0.0009006451612903226, + "loss": 0.504, + "step": 620 + }, + { + "epoch": 10.0, + "step": 625, + "train_accuracy": 0.8490317728896409, + "train_f1": 0.8425370562453873, + "train_loss": 0.4146420955657959, + "train_precision": 0.8499471633168426, + "train_recall": 0.8490317728896409, + "train_runtime": 252.5657, + "train_samples_per_second": 63.18, + "train_steps_per_second": 0.99 + }, + { + "epoch": 10.0, + "eval_accuracy": 0.8246899661781285, + "eval_f1": 0.8181338629292875, + "eval_loss": 0.4954419434070587, + "eval_precision": 0.8265610533827844, + "eval_recall": 0.8246899661781285, + "eval_runtime": 29.7136, + "eval_samples_per_second": 59.703, + "eval_steps_per_second": 0.942, + "step": 625 + }, + { + "epoch": 10.08, + "learning_rate": 0.0008990322580645162, + "loss": 0.4777, + "step": 630 + }, + { + "epoch": 10.24, + "learning_rate": 0.0008974193548387097, + "loss": 0.4595, + "step": 640 + }, + { + "epoch": 10.4, + "learning_rate": 0.0008958064516129032, + "loss": 0.4765, + "step": 650 + }, + { + "epoch": 10.56, + "learning_rate": 0.0008941935483870967, + "loss": 0.481, + "step": 660 + }, + { + "epoch": 10.72, + "learning_rate": 0.0008925806451612903, + "loss": 0.4936, + "step": 670 + }, + { + "epoch": 10.88, + "learning_rate": 0.0008909677419354839, + "loss": 0.4928, + "step": 680 + }, + { + "epoch": 10.99, + "step": 687, + "train_accuracy": 0.8662029203484364, + "train_f1": 0.8651095243578739, + "train_loss": 0.36719974875450134, + "train_precision": 0.8661939607708877, + "train_recall": 0.8662029203484364, + "train_runtime": 248.0205, + "train_samples_per_second": 64.337, + "train_steps_per_second": 1.008 + }, + { + "epoch": 10.99, + "eval_accuracy": 0.8325817361894025, + "eval_f1": 0.8330415565435833, + "eval_loss": 0.4721773862838745, + "eval_precision": 0.8382637561400605, + "eval_recall": 0.8325817361894025, + "eval_runtime": 29.1932, + "eval_samples_per_second": 60.768, + "eval_steps_per_second": 0.959, + "step": 687 + }, + { + "epoch": 11.04, + "learning_rate": 0.0008893548387096775, + "loss": 0.4791, + "step": 690 + }, + { + "epoch": 11.2, + "learning_rate": 0.000887741935483871, + "loss": 0.471, + "step": 700 + }, + { + "epoch": 11.36, + "learning_rate": 0.0008861290322580645, + "loss": 0.4514, + "step": 710 + }, + { + "epoch": 11.52, + "learning_rate": 0.0008845161290322581, + "loss": 0.4554, + "step": 720 + }, + { + "epoch": 11.68, + "learning_rate": 0.0008829032258064516, + "loss": 0.4778, + "step": 730 + }, + { + "epoch": 11.84, + "learning_rate": 0.0008812903225806452, + "loss": 0.4214, + "step": 740 + }, + { + "epoch": 12.0, + "learning_rate": 0.0008796774193548387, + "loss": 0.4555, + "step": 750 + }, + { + "epoch": 12.0, + "step": 750, + "train_accuracy": 0.8760418625054834, + "train_f1": 0.8735297151164703, + "train_loss": 0.341611385345459, + "train_precision": 0.8749284746900984, + "train_recall": 0.8760418625054834, + "train_runtime": 251.381, + "train_samples_per_second": 63.477, + "train_steps_per_second": 0.995 + }, + { + "epoch": 12.0, + "eval_accuracy": 0.846674182638106, + "eval_f1": 0.8431872563091386, + "eval_loss": 0.43833523988723755, + "eval_precision": 0.8434438125814474, + "eval_recall": 0.846674182638106, + "eval_runtime": 29.6753, + "eval_samples_per_second": 59.78, + "eval_steps_per_second": 0.944, + "step": 750 + }, + { + "epoch": 12.16, + "learning_rate": 0.0008780645161290322, + "loss": 0.4597, + "step": 760 + }, + { + "epoch": 12.32, + "learning_rate": 0.0008764516129032259, + "loss": 0.4253, + "step": 770 + }, + { + "epoch": 12.48, + "learning_rate": 0.0008748387096774194, + "loss": 0.473, + "step": 780 + }, + { + "epoch": 12.64, + "learning_rate": 0.0008732258064516129, + "loss": 0.4244, + "step": 790 + }, + { + "epoch": 12.8, + "learning_rate": 0.0008716129032258064, + "loss": 0.4602, + "step": 800 + }, + { + "epoch": 12.96, + "learning_rate": 0.00087, + "loss": 0.4345, + "step": 810 + }, + { + "epoch": 12.99, + "step": 812, + "train_accuracy": 0.8737857993357148, + "train_f1": 0.8699664840253412, + "train_loss": 0.3363898992538452, + "train_precision": 0.8723546877750733, + "train_recall": 0.8737857993357148, + "train_runtime": 254.2635, + "train_samples_per_second": 62.758, + "train_steps_per_second": 0.983 + }, + { + "epoch": 12.99, + "eval_accuracy": 0.8511837655016911, + "eval_f1": 0.8464360738395965, + "eval_loss": 0.44016385078430176, + "eval_precision": 0.8479714249543402, + "eval_recall": 0.8511837655016911, + "eval_runtime": 29.6817, + "eval_samples_per_second": 59.767, + "eval_steps_per_second": 0.943, + "step": 812 + }, + { + "epoch": 13.12, + "learning_rate": 0.0008683870967741936, + "loss": 0.4296, + "step": 820 + }, + { + "epoch": 13.28, + "learning_rate": 0.0008667741935483871, + "loss": 0.4024, + "step": 830 + }, + { + "epoch": 13.44, + "learning_rate": 0.0008651612903225806, + "loss": 0.4268, + "step": 840 + }, + { + "epoch": 13.6, + "learning_rate": 0.0008635483870967742, + "loss": 0.411, + "step": 850 + }, + { + "epoch": 13.76, + "learning_rate": 0.0008619354838709678, + "loss": 0.4179, + "step": 860 + }, + { + "epoch": 13.92, + "learning_rate": 0.0008603225806451614, + "loss": 0.4398, + "step": 870 + }, + { + "epoch": 14.0, + "step": 875, + "train_accuracy": 0.880553988845021, + "train_f1": 0.8770183301442244, + "train_loss": 0.3260194957256317, + "train_precision": 0.8807172403068199, + "train_recall": 0.880553988845021, + "train_runtime": 252.7172, + "train_samples_per_second": 63.142, + "train_steps_per_second": 0.989 + }, + { + "epoch": 14.0, + "eval_accuracy": 0.8494926719278467, + "eval_f1": 0.8469368583418073, + "eval_loss": 0.44426438212394714, + "eval_precision": 0.8497246468797232, + "eval_recall": 0.8494926719278467, + "eval_runtime": 29.5345, + "eval_samples_per_second": 60.065, + "eval_steps_per_second": 0.948, + "step": 875 + }, + { + "epoch": 14.08, + "learning_rate": 0.0008587096774193549, + "loss": 0.4153, + "step": 880 + }, + { + "epoch": 14.24, + "learning_rate": 0.0008570967741935484, + "loss": 0.4065, + "step": 890 + }, + { + "epoch": 14.4, + "learning_rate": 0.0008554838709677419, + "loss": 0.4045, + "step": 900 + }, + { + "epoch": 14.56, + "learning_rate": 0.0008538709677419355, + "loss": 0.3879, + "step": 910 + }, + { + "epoch": 14.72, + "learning_rate": 0.000852258064516129, + "loss": 0.4095, + "step": 920 + }, + { + "epoch": 14.88, + "learning_rate": 0.0008506451612903226, + "loss": 0.405, + "step": 930 + }, + { + "epoch": 14.99, + "step": 937, + "train_accuracy": 0.8919596415366297, + "train_f1": 0.8898750533747908, + "train_loss": 0.29893702268600464, + "train_precision": 0.8905204704311594, + "train_recall": 0.8919596415366297, + "train_runtime": 255.1579, + "train_samples_per_second": 62.538, + "train_steps_per_second": 0.98 + }, + { + "epoch": 14.99, + "eval_accuracy": 0.8568207440811725, + "eval_f1": 0.8543462911384269, + "eval_loss": 0.4096240699291229, + "eval_precision": 0.8552780691368613, + "eval_recall": 0.8568207440811725, + "eval_runtime": 29.6726, + "eval_samples_per_second": 59.786, + "eval_steps_per_second": 0.944, + "step": 937 + }, + { + "epoch": 15.04, + "learning_rate": 0.0008490322580645162, + "loss": 0.4047, + "step": 940 + }, + { + "epoch": 15.2, + "learning_rate": 0.0008474193548387097, + "loss": 0.4049, + "step": 950 + }, + { + "epoch": 15.36, + "learning_rate": 0.0008458064516129033, + "loss": 0.39, + "step": 960 + }, + { + "epoch": 15.52, + "learning_rate": 0.0008441935483870968, + "loss": 0.4253, + "step": 970 + }, + { + "epoch": 15.68, + "learning_rate": 0.0008425806451612903, + "loss": 0.3707, + "step": 980 + }, + { + "epoch": 15.84, + "learning_rate": 0.0008409677419354838, + "loss": 0.389, + "step": 990 + }, + { + "epoch": 16.0, + "learning_rate": 0.0008393548387096774, + "loss": 0.4069, + "step": 1000 + }, + { + "epoch": 16.0, + "step": 1000, + "train_accuracy": 0.8934636836498089, + "train_f1": 0.8909693605701321, + "train_loss": 0.2904220521450043, + "train_precision": 0.893519044249712, + "train_recall": 0.8934636836498089, + "train_runtime": 254.2734, + "train_samples_per_second": 62.755, + "train_steps_per_second": 0.983 + }, + { + "epoch": 16.0, + "eval_accuracy": 0.8500563697857948, + "eval_f1": 0.8452271271553625, + "eval_loss": 0.4389975965023041, + "eval_precision": 0.851487739267586, + "eval_recall": 0.8500563697857948, + "eval_runtime": 29.5522, + "eval_samples_per_second": 60.029, + "eval_steps_per_second": 0.947, + "step": 1000 + }, + { + "epoch": 16.16, + "learning_rate": 0.0008377419354838711, + "loss": 0.389, + "step": 1010 + }, + { + "epoch": 16.32, + "learning_rate": 0.0008361290322580646, + "loss": 0.3682, + "step": 1020 + }, + { + "epoch": 16.48, + "learning_rate": 0.0008345161290322581, + "loss": 0.3746, + "step": 1030 + }, + { + "epoch": 16.64, + "learning_rate": 0.0008329032258064516, + "loss": 0.3817, + "step": 1040 + }, + { + "epoch": 16.8, + "learning_rate": 0.0008312903225806452, + "loss": 0.3652, + "step": 1050 + }, + { + "epoch": 16.96, + "learning_rate": 0.0008296774193548387, + "loss": 0.3774, + "step": 1060 + }, + { + "epoch": 16.99, + "step": 1062, + "train_accuracy": 0.9058093626621545, + "train_f1": 0.9046539806661633, + "train_loss": 0.2608683109283447, + "train_precision": 0.9049170910236276, + "train_recall": 0.9058093626621545, + "train_runtime": 253.2237, + "train_samples_per_second": 63.015, + "train_steps_per_second": 0.987 + }, + { + "epoch": 16.99, + "eval_accuracy": 0.8579481397970687, + "eval_f1": 0.8563523858127897, + "eval_loss": 0.415022075176239, + "eval_precision": 0.8576310002165511, + "eval_recall": 0.8579481397970687, + "eval_runtime": 29.5963, + "eval_samples_per_second": 59.94, + "eval_steps_per_second": 0.946, + "step": 1062 + }, + { + "epoch": 17.12, + "learning_rate": 0.0008280645161290323, + "loss": 0.3537, + "step": 1070 + }, + { + "epoch": 17.28, + "learning_rate": 0.0008264516129032257, + "loss": 0.3743, + "step": 1080 + }, + { + "epoch": 17.44, + "learning_rate": 0.0008248387096774194, + "loss": 0.3648, + "step": 1090 + }, + { + "epoch": 17.6, + "learning_rate": 0.000823225806451613, + "loss": 0.3659, + "step": 1100 + }, + { + "epoch": 17.76, + "learning_rate": 0.0008216129032258065, + "loss": 0.3713, + "step": 1110 + }, + { + "epoch": 17.92, + "learning_rate": 0.00082, + "loss": 0.3631, + "step": 1120 + }, + { + "epoch": 18.0, + "step": 1125, + "train_accuracy": 0.9117628626934887, + "train_f1": 0.9111752276141204, + "train_loss": 0.24888941645622253, + "train_precision": 0.9116531142476182, + "train_recall": 0.9117628626934887, + "train_runtime": 253.2878, + "train_samples_per_second": 62.999, + "train_steps_per_second": 0.987 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.85456595264938, + "eval_f1": 0.8546917151279523, + "eval_loss": 0.42011961340904236, + "eval_precision": 0.858590913110437, + "eval_recall": 0.85456595264938, + "eval_runtime": 29.5881, + "eval_samples_per_second": 59.956, + "eval_steps_per_second": 0.946, + "step": 1125 + }, + { + "epoch": 18.08, + "learning_rate": 0.0008183870967741935, + "loss": 0.3537, + "step": 1130 + }, + { + "epoch": 18.24, + "learning_rate": 0.0008167741935483871, + "loss": 0.3214, + "step": 1140 + }, + { + "epoch": 18.4, + "learning_rate": 0.0008151612903225807, + "loss": 0.3464, + "step": 1150 + }, + { + "epoch": 18.56, + "learning_rate": 0.0008135483870967742, + "loss": 0.3503, + "step": 1160 + }, + { + "epoch": 18.72, + "learning_rate": 0.0008119354838709677, + "loss": 0.3745, + "step": 1170 + }, + { + "epoch": 18.88, + "learning_rate": 0.0008103225806451613, + "loss": 0.3458, + "step": 1180 + }, + { + "epoch": 18.99, + "step": 1187, + "train_accuracy": 0.912326878485931, + "train_f1": 0.9113314150149505, + "train_loss": 0.24995951354503632, + "train_precision": 0.9115719864017833, + "train_recall": 0.912326878485931, + "train_runtime": 254.6215, + "train_samples_per_second": 62.669, + "train_steps_per_second": 0.982 + }, + { + "epoch": 18.99, + "eval_accuracy": 0.8562570462232244, + "eval_f1": 0.8542249820828076, + "eval_loss": 0.40477874875068665, + "eval_precision": 0.8544614619611243, + "eval_recall": 0.8562570462232244, + "eval_runtime": 29.856, + "eval_samples_per_second": 59.418, + "eval_steps_per_second": 0.938, + "step": 1187 + }, + { + "epoch": 19.04, + "learning_rate": 0.0008087096774193549, + "loss": 0.3953, + "step": 1190 + }, + { + "epoch": 19.2, + "learning_rate": 0.0008070967741935484, + "loss": 0.3386, + "step": 1200 + }, + { + "epoch": 19.36, + "learning_rate": 0.000805483870967742, + "loss": 0.3322, + "step": 1210 + }, + { + "epoch": 19.52, + "learning_rate": 0.0008038709677419355, + "loss": 0.3425, + "step": 1220 + }, + { + "epoch": 19.68, + "learning_rate": 0.000802258064516129, + "loss": 0.3448, + "step": 1230 + }, + { + "epoch": 19.84, + "learning_rate": 0.0008006451612903226, + "loss": 0.3534, + "step": 1240 + }, + { + "epoch": 20.0, + "learning_rate": 0.0007990322580645161, + "loss": 0.3361, + "step": 1250 + }, + { + "epoch": 20.0, + "step": 1250, + "train_accuracy": 0.9059346995049194, + "train_f1": 0.9033631776657957, + "train_loss": 0.2528255581855774, + "train_precision": 0.9064546977324616, + "train_recall": 0.9059346995049194, + "train_runtime": 252.7613, + "train_samples_per_second": 63.131, + "train_steps_per_second": 0.989 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.8596392333709132, + "eval_f1": 0.8566333454163209, + "eval_loss": 0.4370974004268646, + "eval_precision": 0.8585278408293291, + "eval_recall": 0.8596392333709132, + "eval_runtime": 29.8096, + "eval_samples_per_second": 59.511, + "eval_steps_per_second": 0.939, + "step": 1250 + }, + { + "epoch": 20.16, + "learning_rate": 0.0007974193548387097, + "loss": 0.3154, + "step": 1260 + }, + { + "epoch": 20.32, + "learning_rate": 0.0007958064516129032, + "loss": 0.3478, + "step": 1270 + }, + { + "epoch": 20.48, + "learning_rate": 0.0007941935483870968, + "loss": 0.3385, + "step": 1280 + }, + { + "epoch": 20.64, + "learning_rate": 0.0007925806451612904, + "loss": 0.3327, + "step": 1290 + }, + { + "epoch": 20.8, + "learning_rate": 0.0007909677419354839, + "loss": 0.3398, + "step": 1300 + }, + { + "epoch": 20.96, + "learning_rate": 0.0007893548387096774, + "loss": 0.3386, + "step": 1310 + }, + { + "epoch": 20.99, + "step": 1312, + "train_accuracy": 0.9205991101084163, + "train_f1": 0.9202297733826367, + "train_loss": 0.22639435529708862, + "train_precision": 0.9204915599848654, + "train_recall": 0.9205991101084163, + "train_runtime": 253.7128, + "train_samples_per_second": 62.894, + "train_steps_per_second": 0.985 + }, + { + "epoch": 20.99, + "eval_accuracy": 0.8686583990980834, + "eval_f1": 0.8680154821185954, + "eval_loss": 0.39551448822021484, + "eval_precision": 0.8689593382775722, + "eval_recall": 0.8686583990980834, + "eval_runtime": 29.773, + "eval_samples_per_second": 59.584, + "eval_steps_per_second": 0.94, + "step": 1312 + }, + { + "epoch": 21.12, + "learning_rate": 0.0007877419354838709, + "loss": 0.3038, + "step": 1320 + }, + { + "epoch": 21.28, + "learning_rate": 0.0007861290322580645, + "loss": 0.3157, + "step": 1330 + }, + { + "epoch": 21.44, + "learning_rate": 0.0007845161290322582, + "loss": 0.318, + "step": 1340 + }, + { + "epoch": 21.6, + "learning_rate": 0.0007829032258064517, + "loss": 0.2993, + "step": 1350 + }, + { + "epoch": 21.76, + "learning_rate": 0.0007812903225806452, + "loss": 0.3185, + "step": 1360 + }, + { + "epoch": 21.92, + "learning_rate": 0.0007796774193548387, + "loss": 0.3091, + "step": 1370 + }, + { + "epoch": 22.0, + "step": 1375, + "train_accuracy": 0.9207871153725637, + "train_f1": 0.919568201335574, + "train_loss": 0.21673625707626343, + "train_precision": 0.9203664780549666, + "train_recall": 0.9207871153725637, + "train_runtime": 249.1604, + "train_samples_per_second": 64.043, + "train_steps_per_second": 1.003 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.8680947012401353, + "eval_f1": 0.8657724581221397, + "eval_loss": 0.4277941584587097, + "eval_precision": 0.8671942484944011, + "eval_recall": 0.8680947012401353, + "eval_runtime": 29.1495, + "eval_samples_per_second": 60.859, + "eval_steps_per_second": 0.961, + "step": 1375 + }, + { + "epoch": 22.08, + "learning_rate": 0.0007780645161290323, + "loss": 0.3109, + "step": 1380 + }, + { + "epoch": 22.24, + "learning_rate": 0.0007764516129032258, + "loss": 0.3082, + "step": 1390 + }, + { + "epoch": 22.4, + "learning_rate": 0.0007748387096774193, + "loss": 0.3133, + "step": 1400 + }, + { + "epoch": 22.56, + "learning_rate": 0.0007732258064516128, + "loss": 0.3217, + "step": 1410 + }, + { + "epoch": 22.72, + "learning_rate": 0.0007716129032258065, + "loss": 0.3145, + "step": 1420 + }, + { + "epoch": 22.88, + "learning_rate": 0.0007700000000000001, + "loss": 0.3081, + "step": 1430 + }, + { + "epoch": 22.99, + "step": 1437, + "train_accuracy": 0.9301247101585511, + "train_f1": 0.9298726796945993, + "train_loss": 0.2020701915025711, + "train_precision": 0.9301061473852973, + "train_recall": 0.9301247101585511, + "train_runtime": 248.5533, + "train_samples_per_second": 64.2, + "train_steps_per_second": 1.006 + }, + { + "epoch": 22.99, + "eval_accuracy": 0.8675310033821871, + "eval_f1": 0.8662116604826631, + "eval_loss": 0.39544782042503357, + "eval_precision": 0.866086748268126, + "eval_recall": 0.8675310033821871, + "eval_runtime": 29.2034, + "eval_samples_per_second": 60.746, + "eval_steps_per_second": 0.959, + "step": 1437 + }, + { + "epoch": 23.04, + "learning_rate": 0.0007683870967741936, + "loss": 0.3357, + "step": 1440 + }, + { + "epoch": 23.2, + "learning_rate": 0.0007667741935483871, + "loss": 0.2981, + "step": 1450 + }, + { + "epoch": 23.36, + "learning_rate": 0.0007651612903225806, + "loss": 0.2823, + "step": 1460 + }, + { + "epoch": 23.52, + "learning_rate": 0.0007635483870967742, + "loss": 0.3036, + "step": 1470 + }, + { + "epoch": 23.68, + "learning_rate": 0.0007619354838709678, + "loss": 0.3097, + "step": 1480 + }, + { + "epoch": 23.84, + "learning_rate": 0.0007603225806451613, + "loss": 0.3004, + "step": 1490 + }, + { + "epoch": 24.0, + "learning_rate": 0.0007587096774193549, + "loss": 0.3031, + "step": 1500 + }, + { + "epoch": 24.0, + "step": 1500, + "train_accuracy": 0.9242965469699819, + "train_f1": 0.9232184701149497, + "train_loss": 0.20474377274513245, + "train_precision": 0.9253522346156541, + "train_recall": 0.9242965469699819, + "train_runtime": 248.6452, + "train_samples_per_second": 64.176, + "train_steps_per_second": 1.005 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.8720405862457723, + "eval_f1": 0.8716583234363133, + "eval_loss": 0.42239370942115784, + "eval_precision": 0.8734092383360484, + "eval_recall": 0.8720405862457723, + "eval_runtime": 29.0555, + "eval_samples_per_second": 61.055, + "eval_steps_per_second": 0.964, + "step": 1500 + }, + { + "epoch": 24.16, + "learning_rate": 0.0007570967741935484, + "loss": 0.2858, + "step": 1510 + }, + { + "epoch": 24.32, + "learning_rate": 0.000755483870967742, + "loss": 0.2999, + "step": 1520 + }, + { + "epoch": 24.48, + "learning_rate": 0.0007538709677419355, + "loss": 0.3135, + "step": 1530 + }, + { + "epoch": 24.64, + "learning_rate": 0.000752258064516129, + "loss": 0.2951, + "step": 1540 + }, + { + "epoch": 24.8, + "learning_rate": 0.0007506451612903225, + "loss": 0.3136, + "step": 1550 + }, + { + "epoch": 24.96, + "learning_rate": 0.0007490322580645161, + "loss": 0.2918, + "step": 1560 + }, + { + "epoch": 24.99, + "step": 1562, + "train_accuracy": 0.9315660838503478, + "train_f1": 0.9309767971055543, + "train_loss": 0.1887647807598114, + "train_precision": 0.9311524146378474, + "train_recall": 0.9315660838503478, + "train_runtime": 249.2905, + "train_samples_per_second": 64.01, + "train_steps_per_second": 1.003 + }, + { + "epoch": 24.99, + "eval_accuracy": 0.8680947012401353, + "eval_f1": 0.8669054461224185, + "eval_loss": 0.43037477135658264, + "eval_precision": 0.8673301666844889, + "eval_recall": 0.8680947012401353, + "eval_runtime": 29.0965, + "eval_samples_per_second": 60.97, + "eval_steps_per_second": 0.962, + "step": 1562 + }, + { + "epoch": 25.12, + "learning_rate": 0.0007474193548387097, + "loss": 0.2943, + "step": 1570 + }, + { + "epoch": 25.28, + "learning_rate": 0.0007458064516129033, + "loss": 0.2796, + "step": 1580 + }, + { + "epoch": 25.44, + "learning_rate": 0.0007441935483870968, + "loss": 0.2712, + "step": 1590 + }, + { + "epoch": 25.6, + "learning_rate": 0.0007425806451612903, + "loss": 0.2862, + "step": 1600 + }, + { + "epoch": 25.76, + "learning_rate": 0.0007409677419354839, + "loss": 0.2833, + "step": 1610 + }, + { + "epoch": 25.92, + "learning_rate": 0.0007393548387096775, + "loss": 0.2594, + "step": 1620 + }, + { + "epoch": 26.0, + "step": 1625, + "train_accuracy": 0.934824841762236, + "train_f1": 0.9340766757397655, + "train_loss": 0.18092262744903564, + "train_precision": 0.9349816666427498, + "train_recall": 0.934824841762236, + "train_runtime": 249.0812, + "train_samples_per_second": 64.063, + "train_steps_per_second": 1.004 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.8776775648252536, + "eval_f1": 0.8766912677805229, + "eval_loss": 0.421634703874588, + "eval_precision": 0.8774332235361698, + "eval_recall": 0.8776775648252536, + "eval_runtime": 30.0967, + "eval_samples_per_second": 58.943, + "eval_steps_per_second": 0.93, + "step": 1625 + }, + { + "epoch": 26.08, + "learning_rate": 0.000737741935483871, + "loss": 0.2729, + "step": 1630 + }, + { + "epoch": 26.24, + "learning_rate": 0.0007361290322580645, + "loss": 0.2801, + "step": 1640 + }, + { + "epoch": 26.4, + "learning_rate": 0.000734516129032258, + "loss": 0.2653, + "step": 1650 + }, + { + "epoch": 26.56, + "learning_rate": 0.0007329032258064517, + "loss": 0.3044, + "step": 1660 + }, + { + "epoch": 26.72, + "learning_rate": 0.0007312903225806452, + "loss": 0.2751, + "step": 1670 + }, + { + "epoch": 26.88, + "learning_rate": 0.0007296774193548388, + "loss": 0.3028, + "step": 1680 + }, + { + "epoch": 26.99, + "step": 1687, + "train_accuracy": 0.93670489440371, + "train_f1": 0.9361197667432394, + "train_loss": 0.1785627156496048, + "train_precision": 0.9365292015778245, + "train_recall": 0.93670489440371, + "train_runtime": 255.0897, + "train_samples_per_second": 62.554, + "train_steps_per_second": 0.98 + }, + { + "epoch": 26.99, + "eval_accuracy": 0.8810597519729425, + "eval_f1": 0.8793922695566861, + "eval_loss": 0.40421751141548157, + "eval_precision": 0.8798634255905717, + "eval_recall": 0.8810597519729425, + "eval_runtime": 45.5693, + "eval_samples_per_second": 38.93, + "eval_steps_per_second": 0.614, + "step": 1687 + }, + { + "epoch": 27.04, + "learning_rate": 0.0007280645161290323, + "loss": 0.2499, + "step": 1690 + }, + { + "epoch": 27.2, + "learning_rate": 0.0007264516129032258, + "loss": 0.2792, + "step": 1700 + }, + { + "epoch": 27.36, + "learning_rate": 0.0007248387096774194, + "loss": 0.2792, + "step": 1710 + }, + { + "epoch": 27.52, + "learning_rate": 0.0007232258064516129, + "loss": 0.2865, + "step": 1720 + }, + { + "epoch": 27.68, + "learning_rate": 0.0007216129032258064, + "loss": 0.2845, + "step": 1730 + }, + { + "epoch": 27.84, + "learning_rate": 0.0007199999999999999, + "loss": 0.2861, + "step": 1740 + }, + { + "epoch": 28.0, + "learning_rate": 0.0007183870967741936, + "loss": 0.2758, + "step": 1750 + }, + { + "epoch": 28.0, + "step": 1750, + "train_accuracy": 0.938459610202419, + "train_f1": 0.9377525981628592, + "train_loss": 0.1685272455215454, + "train_precision": 0.9389554252034343, + "train_recall": 0.938459610202419, + "train_runtime": 253.2422, + "train_samples_per_second": 63.011, + "train_steps_per_second": 0.987 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.8680947012401353, + "eval_f1": 0.8681614791663694, + "eval_loss": 0.41954031586647034, + "eval_precision": 0.8691594298818118, + "eval_recall": 0.8680947012401353, + "eval_runtime": 47.5593, + "eval_samples_per_second": 37.301, + "eval_steps_per_second": 0.589, + "step": 1750 + }, + { + "epoch": 28.16, + "learning_rate": 0.0007167741935483872, + "loss": 0.2659, + "step": 1760 + }, + { + "epoch": 28.32, + "learning_rate": 0.0007151612903225807, + "loss": 0.2522, + "step": 1770 + }, + { + "epoch": 28.48, + "learning_rate": 0.0007135483870967742, + "loss": 0.2755, + "step": 1780 + }, + { + "epoch": 28.64, + "learning_rate": 0.0007119354838709677, + "loss": 0.2736, + "step": 1790 + }, + { + "epoch": 28.8, + "learning_rate": 0.0007103225806451613, + "loss": 0.2847, + "step": 1800 + }, + { + "epoch": 28.96, + "learning_rate": 0.0007087096774193548, + "loss": 0.2833, + "step": 1810 + }, + { + "epoch": 28.99, + "step": 1812, + "train_accuracy": 0.9406530049508053, + "train_f1": 0.9397675181987466, + "train_loss": 0.16751761734485626, + "train_precision": 0.9410854777530413, + "train_recall": 0.9406530049508053, + "train_runtime": 254.9572, + "train_samples_per_second": 62.587, + "train_steps_per_second": 0.981 + }, + { + "epoch": 28.99, + "eval_accuracy": 0.8759864712514093, + "eval_f1": 0.872766620485809, + "eval_loss": 0.37792226672172546, + "eval_precision": 0.8741418008848874, + "eval_recall": 0.8759864712514093, + "eval_runtime": 41.0022, + "eval_samples_per_second": 43.266, + "eval_steps_per_second": 0.683, + "step": 1812 + }, + { + "epoch": 29.12, + "learning_rate": 0.0007070967741935484, + "loss": 0.2487, + "step": 1820 + }, + { + "epoch": 29.28, + "learning_rate": 0.000705483870967742, + "loss": 0.2712, + "step": 1830 + }, + { + "epoch": 29.44, + "learning_rate": 0.0007038709677419355, + "loss": 0.243, + "step": 1840 + }, + { + "epoch": 29.6, + "learning_rate": 0.0007022580645161291, + "loss": 0.2438, + "step": 1850 + }, + { + "epoch": 29.76, + "learning_rate": 0.0007006451612903226, + "loss": 0.2834, + "step": 1860 + }, + { + "epoch": 29.92, + "learning_rate": 0.0006990322580645161, + "loss": 0.2414, + "step": 1870 + }, + { + "epoch": 30.0, + "step": 1875, + "train_accuracy": 0.9470451839318168, + "train_f1": 0.9467087754155913, + "train_loss": 0.15353631973266602, + "train_precision": 0.9468910650190296, + "train_recall": 0.9470451839318168, + "train_runtime": 255.1175, + "train_samples_per_second": 62.548, + "train_steps_per_second": 0.98 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.8799323562570462, + "eval_f1": 0.8785953814489085, + "eval_loss": 0.4351659119129181, + "eval_precision": 0.8796722516525893, + "eval_recall": 0.8799323562570462, + "eval_runtime": 38.7284, + "eval_samples_per_second": 45.806, + "eval_steps_per_second": 0.723, + "step": 1875 + }, + { + "epoch": 30.08, + "learning_rate": 0.0006974193548387096, + "loss": 0.2458, + "step": 1880 + }, + { + "epoch": 30.24, + "learning_rate": 0.0006958064516129032, + "loss": 0.2436, + "step": 1890 + }, + { + "epoch": 30.4, + "learning_rate": 0.0006941935483870968, + "loss": 0.2279, + "step": 1900 + }, + { + "epoch": 30.56, + "learning_rate": 0.0006925806451612904, + "loss": 0.2496, + "step": 1910 + }, + { + "epoch": 30.72, + "learning_rate": 0.0006909677419354839, + "loss": 0.2527, + "step": 1920 + }, + { + "epoch": 30.88, + "learning_rate": 0.0006893548387096774, + "loss": 0.2508, + "step": 1930 + }, + { + "epoch": 30.99, + "step": 1937, + "train_accuracy": 0.945729147082785, + "train_f1": 0.9452594948649355, + "train_loss": 0.14930781722068787, + "train_precision": 0.9455852542228912, + "train_recall": 0.945729147082785, + "train_runtime": 254.6951, + "train_samples_per_second": 62.651, + "train_steps_per_second": 0.982 + }, + { + "epoch": 30.99, + "eval_accuracy": 0.887260428410372, + "eval_f1": 0.8853880807732613, + "eval_loss": 0.418369859457016, + "eval_precision": 0.8856923487657594, + "eval_recall": 0.887260428410372, + "eval_runtime": 39.5926, + "eval_samples_per_second": 44.806, + "eval_steps_per_second": 0.707, + "step": 1937 + }, + { + "epoch": 31.04, + "learning_rate": 0.000687741935483871, + "loss": 0.2344, + "step": 1940 + }, + { + "epoch": 31.2, + "learning_rate": 0.0006861290322580645, + "loss": 0.2417, + "step": 1950 + }, + { + "epoch": 31.36, + "learning_rate": 0.0006845161290322581, + "loss": 0.252, + "step": 1960 + }, + { + "epoch": 31.52, + "learning_rate": 0.0006829032258064516, + "loss": 0.2465, + "step": 1970 + }, + { + "epoch": 31.68, + "learning_rate": 0.0006812903225806451, + "loss": 0.2405, + "step": 1980 + }, + { + "epoch": 31.84, + "learning_rate": 0.0006796774193548388, + "loss": 0.2275, + "step": 1990 + }, + { + "epoch": 32.0, + "learning_rate": 0.0006780645161290323, + "loss": 0.2509, + "step": 2000 + }, + { + "epoch": 32.0, + "step": 2000, + "train_accuracy": 0.9509932944789121, + "train_f1": 0.950617708879211, + "train_loss": 0.14158745110034943, + "train_precision": 0.9510854186462011, + "train_recall": 0.9509932944789121, + "train_runtime": 255.2742, + "train_samples_per_second": 62.509, + "train_steps_per_second": 0.979 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.8889515219842165, + "eval_f1": 0.8871024769373068, + "eval_loss": 0.41492128372192383, + "eval_precision": 0.8891530304505318, + "eval_recall": 0.8889515219842165, + "eval_runtime": 35.6073, + "eval_samples_per_second": 49.821, + "eval_steps_per_second": 0.786, + "step": 2000 + }, + { + "epoch": 32.16, + "learning_rate": 0.0006764516129032258, + "loss": 0.22, + "step": 2010 + }, + { + "epoch": 32.32, + "learning_rate": 0.0006748387096774193, + "loss": 0.243, + "step": 2020 + }, + { + "epoch": 32.48, + "learning_rate": 0.0006732258064516129, + "loss": 0.2465, + "step": 2030 + }, + { + "epoch": 32.64, + "learning_rate": 0.0006716129032258065, + "loss": 0.2352, + "step": 2040 + }, + { + "epoch": 32.8, + "learning_rate": 0.00067, + "loss": 0.2314, + "step": 2050 + }, + { + "epoch": 32.96, + "learning_rate": 0.0006683870967741935, + "loss": 0.2425, + "step": 2060 + }, + { + "epoch": 32.99, + "step": 2062, + "train_accuracy": 0.9521839944851789, + "train_f1": 0.9519406193802002, + "train_loss": 0.13532106578350067, + "train_precision": 0.9520376150741188, + "train_recall": 0.9521839944851789, + "train_runtime": 253.863, + "train_samples_per_second": 62.857, + "train_steps_per_second": 0.985 + }, + { + "epoch": 32.99, + "eval_accuracy": 0.8900789177001127, + "eval_f1": 0.8892207946311137, + "eval_loss": 0.41321179270744324, + "eval_precision": 0.889613661345136, + "eval_recall": 0.8900789177001127, + "eval_runtime": 34.1249, + "eval_samples_per_second": 51.985, + "eval_steps_per_second": 0.821, + "step": 2062 + }, + { + "epoch": 33.12, + "learning_rate": 0.0006667741935483871, + "loss": 0.2533, + "step": 2070 + }, + { + "epoch": 33.28, + "learning_rate": 0.0006651612903225807, + "loss": 0.2495, + "step": 2080 + }, + { + "epoch": 33.44, + "learning_rate": 0.0006635483870967743, + "loss": 0.2216, + "step": 2090 + }, + { + "epoch": 33.6, + "learning_rate": 0.0006619354838709678, + "loss": 0.2334, + "step": 2100 + }, + { + "epoch": 33.76, + "learning_rate": 0.0006603225806451613, + "loss": 0.209, + "step": 2110 + }, + { + "epoch": 33.92, + "learning_rate": 0.0006587096774193548, + "loss": 0.2319, + "step": 2120 + }, + { + "epoch": 34.0, + "step": 2125, + "train_accuracy": 0.9527480102776211, + "train_f1": 0.9523272165039456, + "train_loss": 0.13454996049404144, + "train_precision": 0.9527286937272258, + "train_recall": 0.9527480102776211, + "train_runtime": 253.9192, + "train_samples_per_second": 62.843, + "train_steps_per_second": 0.985 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.8906426155580609, + "eval_f1": 0.8896475365939425, + "eval_loss": 0.3986554443836212, + "eval_precision": 0.889369678049395, + "eval_recall": 0.8906426155580609, + "eval_runtime": 32.2322, + "eval_samples_per_second": 55.038, + "eval_steps_per_second": 0.869, + "step": 2125 + }, + { + "epoch": 34.08, + "learning_rate": 0.0006570967741935484, + "loss": 0.2142, + "step": 2130 + }, + { + "epoch": 34.24, + "learning_rate": 0.0006554838709677419, + "loss": 0.195, + "step": 2140 + }, + { + "epoch": 34.4, + "learning_rate": 0.0006538709677419356, + "loss": 0.2257, + "step": 2150 + }, + { + "epoch": 34.56, + "learning_rate": 0.000652258064516129, + "loss": 0.2191, + "step": 2160 + }, + { + "epoch": 34.72, + "learning_rate": 0.0006506451612903226, + "loss": 0.2472, + "step": 2170 + }, + { + "epoch": 34.88, + "learning_rate": 0.0006490322580645162, + "loss": 0.256, + "step": 2180 + }, + { + "epoch": 34.99, + "step": 2187, + "train_accuracy": 0.956633452403334, + "train_f1": 0.9563227521739598, + "train_loss": 0.13102850317955017, + "train_precision": 0.9566100366343153, + "train_recall": 0.956633452403334, + "train_runtime": 255.8713, + "train_samples_per_second": 62.363, + "train_steps_per_second": 0.977 + }, + { + "epoch": 34.99, + "eval_accuracy": 0.8878241262683202, + "eval_f1": 0.8863562863477997, + "eval_loss": 0.40525099635124207, + "eval_precision": 0.8863594064224781, + "eval_recall": 0.8878241262683202, + "eval_runtime": 29.2139, + "eval_samples_per_second": 60.725, + "eval_steps_per_second": 0.958, + "step": 2187 + }, + { + "epoch": 35.04, + "learning_rate": 0.0006474193548387097, + "loss": 0.2196, + "step": 2190 + }, + { + "epoch": 35.2, + "learning_rate": 0.0006458064516129032, + "loss": 0.208, + "step": 2200 + }, + { + "epoch": 35.36, + "learning_rate": 0.0006441935483870967, + "loss": 0.2125, + "step": 2210 + }, + { + "epoch": 35.52, + "learning_rate": 0.0006425806451612903, + "loss": 0.234, + "step": 2220 + }, + { + "epoch": 35.68, + "learning_rate": 0.0006409677419354839, + "loss": 0.2161, + "step": 2230 + }, + { + "epoch": 35.84, + "learning_rate": 0.0006393548387096775, + "loss": 0.2384, + "step": 2240 + }, + { + "epoch": 36.0, + "learning_rate": 0.000637741935483871, + "loss": 0.2005, + "step": 2250 + }, + { + "epoch": 36.0, + "step": 2250, + "train_accuracy": 0.9563201102964216, + "train_f1": 0.9559487475587907, + "train_loss": 0.1280374825000763, + "train_precision": 0.95649817681943, + "train_recall": 0.9563201102964216, + "train_runtime": 249.7425, + "train_samples_per_second": 63.894, + "train_steps_per_second": 1.001 + }, + { + "epoch": 36.0, + "eval_accuracy": 0.8906426155580609, + "eval_f1": 0.8889303745678712, + "eval_loss": 0.42412662506103516, + "eval_precision": 0.8905735232548874, + "eval_recall": 0.8906426155580609, + "eval_runtime": 29.236, + "eval_samples_per_second": 60.679, + "eval_steps_per_second": 0.958, + "step": 2250 + }, + { + "epoch": 36.16, + "learning_rate": 0.0006361290322580645, + "loss": 0.2062, + "step": 2260 + }, + { + "epoch": 36.32, + "learning_rate": 0.0006345161290322581, + "loss": 0.2251, + "step": 2270 + }, + { + "epoch": 36.48, + "learning_rate": 0.0006329032258064516, + "loss": 0.2086, + "step": 2280 + }, + { + "epoch": 36.64, + "learning_rate": 0.0006312903225806452, + "loss": 0.2239, + "step": 2290 + }, + { + "epoch": 36.8, + "learning_rate": 0.0006296774193548387, + "loss": 0.2053, + "step": 2300 + }, + { + "epoch": 36.96, + "learning_rate": 0.0006280645161290322, + "loss": 0.2151, + "step": 2310 + }, + { + "epoch": 36.99, + "step": 2312, + "train_accuracy": 0.9576361471454534, + "train_f1": 0.9574828276125282, + "train_loss": 0.12214481085538864, + "train_precision": 0.9575674620543635, + "train_recall": 0.9576361471454534, + "train_runtime": 250.154, + "train_samples_per_second": 63.789, + "train_steps_per_second": 0.999 + }, + { + "epoch": 36.99, + "eval_accuracy": 0.8928974069898534, + "eval_f1": 0.8924893933635943, + "eval_loss": 0.44935598969459534, + "eval_precision": 0.89342937260661, + "eval_recall": 0.8928974069898534, + "eval_runtime": 28.948, + "eval_samples_per_second": 61.282, + "eval_steps_per_second": 0.967, + "step": 2312 + }, + { + "epoch": 37.12, + "learning_rate": 0.0006264516129032259, + "loss": 0.2049, + "step": 2320 + }, + { + "epoch": 37.28, + "learning_rate": 0.0006248387096774194, + "loss": 0.2165, + "step": 2330 + }, + { + "epoch": 37.44, + "learning_rate": 0.0006232258064516129, + "loss": 0.2106, + "step": 2340 + }, + { + "epoch": 37.6, + "learning_rate": 0.0006216129032258064, + "loss": 0.211, + "step": 2350 + }, + { + "epoch": 37.76, + "learning_rate": 0.00062, + "loss": 0.1914, + "step": 2360 + }, + { + "epoch": 37.92, + "learning_rate": 0.0006183870967741936, + "loss": 0.2264, + "step": 2370 + }, + { + "epoch": 38.0, + "step": 2375, + "train_accuracy": 0.9594535313655449, + "train_f1": 0.9591420123249457, + "train_loss": 0.11748312413692474, + "train_precision": 0.9594846416905936, + "train_recall": 0.9594535313655449, + "train_runtime": 250.2463, + "train_samples_per_second": 63.765, + "train_steps_per_second": 0.999 + }, + { + "epoch": 38.0, + "eval_accuracy": 0.8889515219842165, + "eval_f1": 0.8874516370478664, + "eval_loss": 0.39475908875465393, + "eval_precision": 0.8871997983000491, + "eval_recall": 0.8889515219842165, + "eval_runtime": 29.3223, + "eval_samples_per_second": 60.5, + "eval_steps_per_second": 0.955, + "step": 2375 + }, + { + "epoch": 38.08, + "learning_rate": 0.0006167741935483871, + "loss": 0.2018, + "step": 2380 + }, + { + "epoch": 38.24, + "learning_rate": 0.0006151612903225806, + "loss": 0.2047, + "step": 2390 + }, + { + "epoch": 38.4, + "learning_rate": 0.0006135483870967742, + "loss": 0.1931, + "step": 2400 + }, + { + "epoch": 38.56, + "learning_rate": 0.0006119354838709678, + "loss": 0.1973, + "step": 2410 + }, + { + "epoch": 38.72, + "learning_rate": 0.0006103225806451613, + "loss": 0.1901, + "step": 2420 + }, + { + "epoch": 38.88, + "learning_rate": 0.0006087096774193549, + "loss": 0.2128, + "step": 2430 + }, + { + "epoch": 38.99, + "step": 2437, + "train_accuracy": 0.9598295418938397, + "train_f1": 0.959583924650842, + "train_loss": 0.12042330950498581, + "train_precision": 0.9599378088639902, + "train_recall": 0.9598295418938397, + "train_runtime": 250.3192, + "train_samples_per_second": 63.747, + "train_steps_per_second": 0.999 + }, + { + "epoch": 38.99, + "eval_accuracy": 0.8934611048478016, + "eval_f1": 0.8921610181685623, + "eval_loss": 0.4096975028514862, + "eval_precision": 0.8925841926833548, + "eval_recall": 0.8934611048478016, + "eval_runtime": 29.0714, + "eval_samples_per_second": 61.022, + "eval_steps_per_second": 0.963, + "step": 2437 + }, + { + "epoch": 39.04, + "learning_rate": 0.0006070967741935484, + "loss": 0.2022, + "step": 2440 + }, + { + "epoch": 39.2, + "learning_rate": 0.0006054838709677419, + "loss": 0.2015, + "step": 2450 + }, + { + "epoch": 39.36, + "learning_rate": 0.0006038709677419355, + "loss": 0.1906, + "step": 2460 + }, + { + "epoch": 39.52, + "learning_rate": 0.000602258064516129, + "loss": 0.2003, + "step": 2470 + }, + { + "epoch": 39.68, + "learning_rate": 0.0006006451612903226, + "loss": 0.2248, + "step": 2480 + }, + { + "epoch": 39.84, + "learning_rate": 0.0005990322580645161, + "loss": 0.1946, + "step": 2490 + }, + { + "epoch": 40.0, + "learning_rate": 0.0005974193548387097, + "loss": 0.2025, + "step": 2500 + }, + { + "epoch": 40.0, + "step": 2500, + "train_accuracy": 0.9597668734724573, + "train_f1": 0.9596223430089906, + "train_loss": 0.11334564536809921, + "train_precision": 0.9597271475233058, + "train_recall": 0.9597668734724573, + "train_runtime": 249.5517, + "train_samples_per_second": 63.943, + "train_steps_per_second": 1.002 + }, + { + "epoch": 40.0, + "eval_accuracy": 0.8906426155580609, + "eval_f1": 0.8896192336703185, + "eval_loss": 0.4116363823413849, + "eval_precision": 0.8906240785065708, + "eval_recall": 0.8906426155580609, + "eval_runtime": 29.0689, + "eval_samples_per_second": 61.027, + "eval_steps_per_second": 0.963, + "step": 2500 + }, + { + "epoch": 40.16, + "learning_rate": 0.0005958064516129033, + "loss": 0.1783, + "step": 2510 + }, + { + "epoch": 40.32, + "learning_rate": 0.0005941935483870968, + "loss": 0.1915, + "step": 2520 + }, + { + "epoch": 40.48, + "learning_rate": 0.0005925806451612903, + "loss": 0.2113, + "step": 2530 + }, + { + "epoch": 40.64, + "learning_rate": 0.0005909677419354838, + "loss": 0.1906, + "step": 2540 + }, + { + "epoch": 40.8, + "learning_rate": 0.0005893548387096774, + "loss": 0.1835, + "step": 2550 + }, + { + "epoch": 40.96, + "learning_rate": 0.0005877419354838711, + "loss": 0.2171, + "step": 2560 + }, + { + "epoch": 40.99, + "step": 2562, + "train_accuracy": 0.9614589208497838, + "train_f1": 0.9612864417727561, + "train_loss": 0.11530015617609024, + "train_precision": 0.9614218707043837, + "train_recall": 0.9614589208497838, + "train_runtime": 250.011, + "train_samples_per_second": 63.825, + "train_steps_per_second": 1.0 + }, + { + "epoch": 40.99, + "eval_accuracy": 0.8917700112739572, + "eval_f1": 0.890807210174242, + "eval_loss": 0.39598962664604187, + "eval_precision": 0.8908114942653367, + "eval_recall": 0.8917700112739572, + "eval_runtime": 29.1379, + "eval_samples_per_second": 60.883, + "eval_steps_per_second": 0.961, + "step": 2562 + }, + { + "epoch": 41.12, + "learning_rate": 0.0005861290322580646, + "loss": 0.1964, + "step": 2570 + }, + { + "epoch": 41.28, + "learning_rate": 0.0005845161290322581, + "loss": 0.1827, + "step": 2580 + }, + { + "epoch": 41.44, + "learning_rate": 0.0005829032258064516, + "loss": 0.2079, + "step": 2590 + }, + { + "epoch": 41.6, + "learning_rate": 0.0005812903225806452, + "loss": 0.2013, + "step": 2600 + }, + { + "epoch": 41.76, + "learning_rate": 0.0005796774193548387, + "loss": 0.2037, + "step": 2610 + }, + { + "epoch": 41.92, + "learning_rate": 0.0005780645161290323, + "loss": 0.2036, + "step": 2620 + }, + { + "epoch": 42.0, + "step": 2625, + "train_accuracy": 0.9622736103277558, + "train_f1": 0.9620354688265563, + "train_loss": 0.10922261327505112, + "train_precision": 0.9625027768138478, + "train_recall": 0.9622736103277558, + "train_runtime": 249.7106, + "train_samples_per_second": 63.902, + "train_steps_per_second": 1.001 + }, + { + "epoch": 42.0, + "eval_accuracy": 0.8928974069898534, + "eval_f1": 0.891600694267099, + "eval_loss": 0.3954656422138214, + "eval_precision": 0.8932440084981708, + "eval_recall": 0.8928974069898534, + "eval_runtime": 29.1604, + "eval_samples_per_second": 60.836, + "eval_steps_per_second": 0.96, + "step": 2625 + }, + { + "epoch": 42.08, + "learning_rate": 0.0005764516129032257, + "loss": 0.1971, + "step": 2630 + }, + { + "epoch": 42.24, + "learning_rate": 0.0005748387096774194, + "loss": 0.1976, + "step": 2640 + }, + { + "epoch": 42.4, + "learning_rate": 0.000573225806451613, + "loss": 0.2098, + "step": 2650 + }, + { + "epoch": 42.56, + "learning_rate": 0.0005716129032258065, + "loss": 0.1833, + "step": 2660 + }, + { + "epoch": 42.72, + "learning_rate": 0.00057, + "loss": 0.1936, + "step": 2670 + }, + { + "epoch": 42.88, + "learning_rate": 0.0005683870967741935, + "loss": 0.1849, + "step": 2680 + }, + { + "epoch": 42.99, + "step": 2687, + "train_accuracy": 0.9676004261452654, + "train_f1": 0.9675534222659815, + "train_loss": 0.10426949709653854, + "train_precision": 0.9676367935294685, + "train_recall": 0.9676004261452654, + "train_runtime": 251.7545, + "train_samples_per_second": 63.383, + "train_steps_per_second": 0.993 + }, + { + "epoch": 42.99, + "eval_accuracy": 0.8996617812852311, + "eval_f1": 0.8986036298251875, + "eval_loss": 0.3904741108417511, + "eval_precision": 0.8992691335353804, + "eval_recall": 0.8996617812852311, + "eval_runtime": 29.0995, + "eval_samples_per_second": 60.963, + "eval_steps_per_second": 0.962, + "step": 2687 + }, + { + "epoch": 43.04, + "learning_rate": 0.0005667741935483871, + "loss": 0.2041, + "step": 2690 + }, + { + "epoch": 43.2, + "learning_rate": 0.0005651612903225807, + "loss": 0.174, + "step": 2700 + }, + { + "epoch": 43.36, + "learning_rate": 0.0005635483870967742, + "loss": 0.1792, + "step": 2710 + }, + { + "epoch": 43.52, + "learning_rate": 0.0005619354838709677, + "loss": 0.1688, + "step": 2720 + }, + { + "epoch": 43.68, + "learning_rate": 0.0005603225806451613, + "loss": 0.1741, + "step": 2730 + }, + { + "epoch": 43.84, + "learning_rate": 0.0005587096774193549, + "loss": 0.2102, + "step": 2740 + }, + { + "epoch": 44.0, + "learning_rate": 0.0005570967741935484, + "loss": 0.1852, + "step": 2750 + }, + { + "epoch": 44.0, + "step": 2750, + "train_accuracy": 0.964843015604437, + "train_f1": 0.9647050151290405, + "train_loss": 0.10291223973035812, + "train_precision": 0.9649053412080801, + "train_recall": 0.964843015604437, + "train_runtime": 250.3464, + "train_samples_per_second": 63.74, + "train_steps_per_second": 0.999 + }, + { + "epoch": 44.0, + "eval_accuracy": 0.891206313416009, + "eval_f1": 0.8900612906872463, + "eval_loss": 0.42409268021583557, + "eval_precision": 0.891982003380522, + "eval_recall": 0.891206313416009, + "eval_runtime": 29.1706, + "eval_samples_per_second": 60.815, + "eval_steps_per_second": 0.96, + "step": 2750 + }, + { + "epoch": 44.16, + "learning_rate": 0.000555483870967742, + "loss": 0.1816, + "step": 2760 + }, + { + "epoch": 44.32, + "learning_rate": 0.0005538709677419355, + "loss": 0.1765, + "step": 2770 + }, + { + "epoch": 44.48, + "learning_rate": 0.000552258064516129, + "loss": 0.1748, + "step": 2780 + }, + { + "epoch": 44.64, + "learning_rate": 0.0005506451612903226, + "loss": 0.2168, + "step": 2790 + }, + { + "epoch": 44.8, + "learning_rate": 0.0005490322580645161, + "loss": 0.215, + "step": 2800 + }, + { + "epoch": 44.96, + "learning_rate": 0.0005474193548387097, + "loss": 0.1721, + "step": 2810 + }, + { + "epoch": 44.99, + "step": 2812, + "train_accuracy": 0.9665350629817635, + "train_f1": 0.9664032195703065, + "train_loss": 0.09303626418113708, + "train_precision": 0.9665823467276443, + "train_recall": 0.9665350629817635, + "train_runtime": 250.871, + "train_samples_per_second": 63.606, + "train_steps_per_second": 0.997 + }, + { + "epoch": 44.99, + "eval_accuracy": 0.8962795941375423, + "eval_f1": 0.8948599549258702, + "eval_loss": 0.4103808104991913, + "eval_precision": 0.8955637765539853, + "eval_recall": 0.8962795941375423, + "eval_runtime": 29.0827, + "eval_samples_per_second": 60.998, + "eval_steps_per_second": 0.963, + "step": 2812 + }, + { + "epoch": 45.12, + "learning_rate": 0.0005458064516129032, + "loss": 0.1553, + "step": 2820 + }, + { + "epoch": 45.28, + "learning_rate": 0.0005441935483870968, + "loss": 0.1801, + "step": 2830 + }, + { + "epoch": 45.44, + "learning_rate": 0.0005425806451612904, + "loss": 0.1815, + "step": 2840 + }, + { + "epoch": 45.6, + "learning_rate": 0.0005409677419354839, + "loss": 0.2039, + "step": 2850 + }, + { + "epoch": 45.76, + "learning_rate": 0.0005393548387096774, + "loss": 0.1867, + "step": 2860 + }, + { + "epoch": 45.92, + "learning_rate": 0.0005377419354838709, + "loss": 0.186, + "step": 2870 + }, + { + "epoch": 46.0, + "step": 2875, + "train_accuracy": 0.9643416682333772, + "train_f1": 0.9642012339068207, + "train_loss": 0.09887776523828506, + "train_precision": 0.9644991194334305, + "train_recall": 0.9643416682333772, + "train_runtime": 254.5578, + "train_samples_per_second": 62.685, + "train_steps_per_second": 0.982 + }, + { + "epoch": 46.0, + "eval_accuracy": 0.8917700112739572, + "eval_f1": 0.8901319153136237, + "eval_loss": 0.4248127341270447, + "eval_precision": 0.8912530636364911, + "eval_recall": 0.8917700112739572, + "eval_runtime": 29.5608, + "eval_samples_per_second": 60.012, + "eval_steps_per_second": 0.947, + "step": 2875 + }, + { + "epoch": 46.08, + "learning_rate": 0.0005361290322580645, + "loss": 0.1569, + "step": 2880 + }, + { + "epoch": 46.24, + "learning_rate": 0.0005345161290322581, + "loss": 0.1876, + "step": 2890 + }, + { + "epoch": 46.4, + "learning_rate": 0.0005329032258064517, + "loss": 0.1667, + "step": 2900 + }, + { + "epoch": 46.56, + "learning_rate": 0.0005312903225806452, + "loss": 0.169, + "step": 2910 + }, + { + "epoch": 46.72, + "learning_rate": 0.0005296774193548387, + "loss": 0.1803, + "step": 2920 + }, + { + "epoch": 46.88, + "learning_rate": 0.0005280645161290323, + "loss": 0.1811, + "step": 2930 + }, + { + "epoch": 46.99, + "step": 2937, + "train_accuracy": 0.9637149840195526, + "train_f1": 0.9634798755891425, + "train_loss": 0.10527843236923218, + "train_precision": 0.9638723537275108, + "train_recall": 0.9637149840195526, + "train_runtime": 254.7727, + "train_samples_per_second": 62.632, + "train_steps_per_second": 0.981 + }, + { + "epoch": 46.99, + "eval_accuracy": 0.9013528748590756, + "eval_f1": 0.8999103289546431, + "eval_loss": 0.42358672618865967, + "eval_precision": 0.9005857132412932, + "eval_recall": 0.9013528748590756, + "eval_runtime": 29.8042, + "eval_samples_per_second": 59.522, + "eval_steps_per_second": 0.939, + "step": 2937 + }, + { + "epoch": 47.04, + "learning_rate": 0.0005264516129032258, + "loss": 0.1698, + "step": 2940 + }, + { + "epoch": 47.2, + "learning_rate": 0.0005248387096774193, + "loss": 0.1744, + "step": 2950 + }, + { + "epoch": 47.36, + "learning_rate": 0.0005232258064516128, + "loss": 0.1718, + "step": 2960 + }, + { + "epoch": 47.52, + "learning_rate": 0.0005216129032258065, + "loss": 0.1777, + "step": 2970 + }, + { + "epoch": 47.68, + "learning_rate": 0.0005200000000000001, + "loss": 0.1872, + "step": 2980 + }, + { + "epoch": 47.84, + "learning_rate": 0.0005183870967741936, + "loss": 0.1586, + "step": 2990 + }, + { + "epoch": 48.0, + "learning_rate": 0.0005167741935483871, + "loss": 0.1891, + "step": 3000 + }, + { + "epoch": 48.0, + "step": 3000, + "train_accuracy": 0.9668484050886759, + "train_f1": 0.966687633803237, + "train_loss": 0.09668122977018356, + "train_precision": 0.9668284956038484, + "train_recall": 0.9668484050886759, + "train_runtime": 255.4386, + "train_samples_per_second": 62.469, + "train_steps_per_second": 0.979 + }, + { + "epoch": 48.0, + "eval_accuracy": 0.90304396843292, + "eval_f1": 0.9023176970973613, + "eval_loss": 0.40747305750846863, + "eval_precision": 0.902276892363872, + "eval_recall": 0.90304396843292, + "eval_runtime": 29.6296, + "eval_samples_per_second": 59.873, + "eval_steps_per_second": 0.945, + "step": 3000 + }, + { + "epoch": 48.16, + "learning_rate": 0.0005151612903225806, + "loss": 0.1566, + "step": 3010 + }, + { + "epoch": 48.32, + "learning_rate": 0.0005135483870967742, + "loss": 0.1511, + "step": 3020 + }, + { + "epoch": 48.48, + "learning_rate": 0.0005119354838709677, + "loss": 0.1914, + "step": 3030 + }, + { + "epoch": 48.64, + "learning_rate": 0.0005103225806451613, + "loss": 0.1828, + "step": 3040 + }, + { + "epoch": 48.8, + "learning_rate": 0.0005087096774193549, + "loss": 0.1749, + "step": 3050 + }, + { + "epoch": 48.96, + "learning_rate": 0.0005070967741935484, + "loss": 0.1791, + "step": 3060 + }, + { + "epoch": 48.99, + "step": 3062, + "train_accuracy": 0.9676630945666479, + "train_f1": 0.9675103889274802, + "train_loss": 0.09363168478012085, + "train_precision": 0.9676662039614543, + "train_recall": 0.9676630945666479, + "train_runtime": 255.9566, + "train_samples_per_second": 62.343, + "train_steps_per_second": 0.977 + }, + { + "epoch": 48.99, + "eval_accuracy": 0.8996617812852311, + "eval_f1": 0.8987006309929039, + "eval_loss": 0.4219958186149597, + "eval_precision": 0.8986787206496698, + "eval_recall": 0.8996617812852311, + "eval_runtime": 29.8773, + "eval_samples_per_second": 59.376, + "eval_steps_per_second": 0.937, + "step": 3062 + }, + { + "epoch": 49.12, + "learning_rate": 0.000505483870967742, + "loss": 0.1666, + "step": 3070 + }, + { + "epoch": 49.28, + "learning_rate": 0.0005038709677419355, + "loss": 0.1626, + "step": 3080 + }, + { + "epoch": 49.44, + "learning_rate": 0.000502258064516129, + "loss": 0.1771, + "step": 3090 + }, + { + "epoch": 49.6, + "learning_rate": 0.0005006451612903225, + "loss": 0.179, + "step": 3100 + }, + { + "epoch": 49.76, + "learning_rate": 0.0004990322580645161, + "loss": 0.1634, + "step": 3110 + }, + { + "epoch": 49.92, + "learning_rate": 0.0004974193548387097, + "loss": 0.1702, + "step": 3120 + }, + { + "epoch": 50.0, + "step": 3125, + "train_accuracy": 0.9695431472081218, + "train_f1": 0.9694274832881341, + "train_loss": 0.09215661138296127, + "train_precision": 0.9695025177310069, + "train_recall": 0.9695431472081218, + "train_runtime": 257.2202, + "train_samples_per_second": 62.036, + "train_steps_per_second": 0.972 + }, + { + "epoch": 50.0, + "eval_accuracy": 0.8928974069898534, + "eval_f1": 0.8922038394852897, + "eval_loss": 0.4357841908931732, + "eval_precision": 0.8922256622710033, + "eval_recall": 0.8928974069898534, + "eval_runtime": 30.2592, + "eval_samples_per_second": 58.627, + "eval_steps_per_second": 0.925, + "step": 3125 + }, + { + "epoch": 50.08, + "learning_rate": 0.0004958064516129032, + "loss": 0.1738, + "step": 3130 + }, + { + "epoch": 50.24, + "learning_rate": 0.0004941935483870968, + "loss": 0.1781, + "step": 3140 + }, + { + "epoch": 50.4, + "learning_rate": 0.0004925806451612903, + "loss": 0.1814, + "step": 3150 + }, + { + "epoch": 50.56, + "learning_rate": 0.0004909677419354839, + "loss": 0.1766, + "step": 3160 + }, + { + "epoch": 50.72, + "learning_rate": 0.0004893548387096775, + "loss": 0.1689, + "step": 3170 + }, + { + "epoch": 50.88, + "learning_rate": 0.00048774193548387095, + "loss": 0.1667, + "step": 3180 + }, + { + "epoch": 50.99, + "step": 3187, + "train_accuracy": 0.9679137682521777, + "train_f1": 0.9678451858925848, + "train_loss": 0.08850996196269989, + "train_precision": 0.9680124054185439, + "train_recall": 0.9679137682521777, + "train_runtime": 259.9309, + "train_samples_per_second": 61.389, + "train_steps_per_second": 0.962 + }, + { + "epoch": 50.99, + "eval_accuracy": 0.8957158962795941, + "eval_f1": 0.8947220269155864, + "eval_loss": 0.4486236274242401, + "eval_precision": 0.8959867186310805, + "eval_recall": 0.8957158962795941, + "eval_runtime": 30.0872, + "eval_samples_per_second": 58.962, + "eval_steps_per_second": 0.931, + "step": 3187 + }, + { + "epoch": 51.04, + "learning_rate": 0.00048612903225806457, + "loss": 0.1506, + "step": 3190 + }, + { + "epoch": 51.2, + "learning_rate": 0.00048451612903225807, + "loss": 0.1555, + "step": 3200 + }, + { + "epoch": 51.36, + "learning_rate": 0.00048290322580645163, + "loss": 0.164, + "step": 3210 + }, + { + "epoch": 51.52, + "learning_rate": 0.00048129032258064513, + "loss": 0.1574, + "step": 3220 + }, + { + "epoch": 51.68, + "learning_rate": 0.0004796774193548387, + "loss": 0.1554, + "step": 3230 + }, + { + "epoch": 51.84, + "learning_rate": 0.0004780645161290323, + "loss": 0.1653, + "step": 3240 + }, + { + "epoch": 52.0, + "learning_rate": 0.0004764516129032258, + "loss": 0.1733, + "step": 3250 + }, + { + "epoch": 52.0, + "step": 3250, + "train_accuracy": 0.9680391050949426, + "train_f1": 0.9679543842076324, + "train_loss": 0.09445594996213913, + "train_precision": 0.9681396256853804, + "train_recall": 0.9680391050949426, + "train_runtime": 256.4705, + "train_samples_per_second": 62.218, + "train_steps_per_second": 0.975 + }, + { + "epoch": 52.0, + "eval_accuracy": 0.8990980834272829, + "eval_f1": 0.8984372111101244, + "eval_loss": 0.43172532320022583, + "eval_precision": 0.8994814917612028, + "eval_recall": 0.8990980834272829, + "eval_runtime": 29.5992, + "eval_samples_per_second": 59.934, + "eval_steps_per_second": 0.946, + "step": 3250 + }, + { + "epoch": 52.16, + "learning_rate": 0.00047483870967741937, + "loss": 0.1639, + "step": 3260 + }, + { + "epoch": 52.32, + "learning_rate": 0.00047322580645161287, + "loss": 0.1541, + "step": 3270 + }, + { + "epoch": 52.48, + "learning_rate": 0.0004716129032258065, + "loss": 0.1554, + "step": 3280 + }, + { + "epoch": 52.64, + "learning_rate": 0.00047, + "loss": 0.175, + "step": 3290 + }, + { + "epoch": 52.8, + "learning_rate": 0.00046838709677419354, + "loss": 0.1697, + "step": 3300 + }, + { + "epoch": 52.96, + "learning_rate": 0.0004667741935483871, + "loss": 0.1704, + "step": 3310 + }, + { + "epoch": 52.99, + "step": 3312, + "train_accuracy": 0.9716112051137432, + "train_f1": 0.9715237218187844, + "train_loss": 0.08361362665891647, + "train_precision": 0.9715640137675557, + "train_recall": 0.9716112051137432, + "train_runtime": 251.4627, + "train_samples_per_second": 63.457, + "train_steps_per_second": 0.994 + }, + { + "epoch": 52.99, + "eval_accuracy": 0.9052987598647125, + "eval_f1": 0.9044568554057598, + "eval_loss": 0.41901707649230957, + "eval_precision": 0.9045753701091548, + "eval_recall": 0.9052987598647125, + "eval_runtime": 29.1923, + "eval_samples_per_second": 60.769, + "eval_steps_per_second": 0.959, + "step": 3312 + }, + { + "epoch": 53.12, + "learning_rate": 0.00046516129032258066, + "loss": 0.1537, + "step": 3320 + }, + { + "epoch": 53.28, + "learning_rate": 0.0004635483870967742, + "loss": 0.158, + "step": 3330 + }, + { + "epoch": 53.44, + "learning_rate": 0.0004619354838709677, + "loss": 0.1493, + "step": 3340 + }, + { + "epoch": 53.6, + "learning_rate": 0.0004603225806451613, + "loss": 0.1429, + "step": 3350 + }, + { + "epoch": 53.76, + "learning_rate": 0.00045870967741935484, + "loss": 0.1628, + "step": 3360 + }, + { + "epoch": 53.92, + "learning_rate": 0.0004570967741935484, + "loss": 0.1584, + "step": 3370 + }, + { + "epoch": 54.0, + "step": 3375, + "train_accuracy": 0.9703578366860939, + "train_f1": 0.970311857800694, + "train_loss": 0.08544992655515671, + "train_precision": 0.9704579277168014, + "train_recall": 0.9703578366860939, + "train_runtime": 251.0811, + "train_samples_per_second": 63.553, + "train_steps_per_second": 0.996 + }, + { + "epoch": 54.0, + "eval_accuracy": 0.9052987598647125, + "eval_f1": 0.904599065142192, + "eval_loss": 0.4305163323879242, + "eval_precision": 0.9055272931459877, + "eval_recall": 0.9052987598647125, + "eval_runtime": 29.1269, + "eval_samples_per_second": 60.906, + "eval_steps_per_second": 0.961, + "step": 3375 + }, + { + "epoch": 54.08, + "learning_rate": 0.00045548387096774196, + "loss": 0.1577, + "step": 3380 + }, + { + "epoch": 54.24, + "learning_rate": 0.00045387096774193546, + "loss": 0.168, + "step": 3390 + }, + { + "epoch": 54.4, + "learning_rate": 0.00045225806451612907, + "loss": 0.1541, + "step": 3400 + }, + { + "epoch": 54.56, + "learning_rate": 0.0004506451612903226, + "loss": 0.1443, + "step": 3410 + }, + { + "epoch": 54.72, + "learning_rate": 0.00044903225806451613, + "loss": 0.1363, + "step": 3420 + }, + { + "epoch": 54.88, + "learning_rate": 0.0004474193548387097, + "loss": 0.1763, + "step": 3430 + }, + { + "epoch": 54.99, + "step": 3437, + "train_accuracy": 0.9722378893275678, + "train_f1": 0.9722304845282628, + "train_loss": 0.08548293262720108, + "train_precision": 0.9724014188580902, + "train_recall": 0.9722378893275678, + "train_runtime": 250.8193, + "train_samples_per_second": 63.62, + "train_steps_per_second": 0.997 + }, + { + "epoch": 54.99, + "eval_accuracy": 0.8940248027057497, + "eval_f1": 0.8945776482571843, + "eval_loss": 0.4349919259548187, + "eval_precision": 0.896683319174708, + "eval_recall": 0.8940248027057497, + "eval_runtime": 29.0389, + "eval_samples_per_second": 61.09, + "eval_steps_per_second": 0.964, + "step": 3437 + }, + { + "epoch": 55.04, + "learning_rate": 0.00044580645161290325, + "loss": 0.163, + "step": 3440 + }, + { + "epoch": 55.2, + "learning_rate": 0.0004441935483870968, + "loss": 0.1468, + "step": 3450 + }, + { + "epoch": 55.36, + "learning_rate": 0.0004425806451612903, + "loss": 0.1356, + "step": 3460 + }, + { + "epoch": 55.52, + "learning_rate": 0.00044096774193548387, + "loss": 0.1485, + "step": 3470 + }, + { + "epoch": 55.68, + "learning_rate": 0.00043935483870967743, + "loss": 0.1444, + "step": 3480 + }, + { + "epoch": 55.84, + "learning_rate": 0.000437741935483871, + "loss": 0.1463, + "step": 3490 + }, + { + "epoch": 56.0, + "learning_rate": 0.00043612903225806454, + "loss": 0.1609, + "step": 3500 + }, + { + "epoch": 56.0, + "step": 3500, + "train_accuracy": 0.9711725261640659, + "train_f1": 0.9710418724528065, + "train_loss": 0.08499421924352646, + "train_precision": 0.9711871907063745, + "train_recall": 0.9711725261640659, + "train_runtime": 251.6401, + "train_samples_per_second": 63.412, + "train_steps_per_second": 0.993 + }, + { + "epoch": 56.0, + "eval_accuracy": 0.90304396843292, + "eval_f1": 0.901618221901619, + "eval_loss": 0.4472709894180298, + "eval_precision": 0.9023465256016576, + "eval_recall": 0.90304396843292, + "eval_runtime": 29.9118, + "eval_samples_per_second": 59.308, + "eval_steps_per_second": 0.936, + "step": 3500 + }, + { + "epoch": 56.16, + "learning_rate": 0.00043451612903225805, + "loss": 0.1393, + "step": 3510 + }, + { + "epoch": 56.32, + "learning_rate": 0.00043290322580645166, + "loss": 0.1481, + "step": 3520 + }, + { + "epoch": 56.48, + "learning_rate": 0.00043129032258064516, + "loss": 0.1449, + "step": 3530 + }, + { + "epoch": 56.64, + "learning_rate": 0.0004296774193548387, + "loss": 0.168, + "step": 3540 + }, + { + "epoch": 56.8, + "learning_rate": 0.0004280645161290322, + "loss": 0.1722, + "step": 3550 + }, + { + "epoch": 56.96, + "learning_rate": 0.00042645161290322584, + "loss": 0.1549, + "step": 3560 + }, + { + "epoch": 56.99, + "step": 3562, + "train_accuracy": 0.9697311524722693, + "train_f1": 0.96954160239289, + "train_loss": 0.0850997045636177, + "train_precision": 0.9698850921890346, + "train_recall": 0.9697311524722693, + "train_runtime": 250.5824, + "train_samples_per_second": 63.68, + "train_steps_per_second": 0.998 + }, + { + "epoch": 56.99, + "eval_accuracy": 0.9041713641488163, + "eval_f1": 0.9027359585690536, + "eval_loss": 0.4477776885032654, + "eval_precision": 0.9037863994127037, + "eval_recall": 0.9041713641488163, + "eval_runtime": 29.1274, + "eval_samples_per_second": 60.905, + "eval_steps_per_second": 0.961, + "step": 3562 + }, + { + "epoch": 57.12, + "learning_rate": 0.0004248387096774194, + "loss": 0.1415, + "step": 3570 + }, + { + "epoch": 57.28, + "learning_rate": 0.0004232258064516129, + "loss": 0.1557, + "step": 3580 + }, + { + "epoch": 57.44, + "learning_rate": 0.00042161290322580646, + "loss": 0.143, + "step": 3590 + }, + { + "epoch": 57.6, + "learning_rate": 0.00042, + "loss": 0.154, + "step": 3600 + }, + { + "epoch": 57.76, + "learning_rate": 0.0004183870967741936, + "loss": 0.1325, + "step": 3610 + }, + { + "epoch": 57.92, + "learning_rate": 0.0004167741935483871, + "loss": 0.1586, + "step": 3620 + }, + { + "epoch": 58.0, + "step": 3625, + "train_accuracy": 0.9711098577426834, + "train_f1": 0.9710190406318763, + "train_loss": 0.08111045509576797, + "train_precision": 0.971108783894692, + "train_recall": 0.9711098577426834, + "train_runtime": 252.5322, + "train_samples_per_second": 63.188, + "train_steps_per_second": 0.99 + }, + { + "epoch": 58.0, + "eval_accuracy": 0.9024802705749718, + "eval_f1": 0.9011546163080018, + "eval_loss": 0.4446905255317688, + "eval_precision": 0.9024443209930092, + "eval_recall": 0.9024802705749718, + "eval_runtime": 29.4234, + "eval_samples_per_second": 60.292, + "eval_steps_per_second": 0.952, + "step": 3625 + }, + { + "epoch": 58.08, + "learning_rate": 0.00041516129032258064, + "loss": 0.1589, + "step": 3630 + }, + { + "epoch": 58.24, + "learning_rate": 0.00041354838709677425, + "loss": 0.1368, + "step": 3640 + }, + { + "epoch": 58.4, + "learning_rate": 0.00041193548387096775, + "loss": 0.135, + "step": 3650 + }, + { + "epoch": 58.56, + "learning_rate": 0.0004103225806451613, + "loss": 0.1393, + "step": 3660 + }, + { + "epoch": 58.72, + "learning_rate": 0.0004087096774193548, + "loss": 0.1414, + "step": 3670 + }, + { + "epoch": 58.88, + "learning_rate": 0.00040709677419354843, + "loss": 0.1476, + "step": 3680 + }, + { + "epoch": 58.99, + "step": 3687, + "train_accuracy": 0.9716738735351257, + "train_f1": 0.9716109081205196, + "train_loss": 0.08109210431575775, + "train_precision": 0.971619589233349, + "train_recall": 0.9716738735351257, + "train_runtime": 251.3717, + "train_samples_per_second": 63.48, + "train_steps_per_second": 0.995 + }, + { + "epoch": 58.99, + "eval_accuracy": 0.9007891770011274, + "eval_f1": 0.8998310286529279, + "eval_loss": 0.46749255061149597, + "eval_precision": 0.9005611855026796, + "eval_recall": 0.9007891770011274, + "eval_runtime": 29.4648, + "eval_samples_per_second": 60.207, + "eval_steps_per_second": 0.95, + "step": 3687 + }, + { + "epoch": 59.04, + "learning_rate": 0.00040548387096774193, + "loss": 0.1469, + "step": 3690 + }, + { + "epoch": 59.2, + "learning_rate": 0.0004038709677419355, + "loss": 0.1285, + "step": 3700 + }, + { + "epoch": 59.36, + "learning_rate": 0.00040225806451612905, + "loss": 0.1543, + "step": 3710 + }, + { + "epoch": 59.52, + "learning_rate": 0.00040064516129032255, + "loss": 0.1576, + "step": 3720 + }, + { + "epoch": 59.68, + "learning_rate": 0.00039903225806451616, + "loss": 0.1639, + "step": 3730 + }, + { + "epoch": 59.84, + "learning_rate": 0.00039741935483870967, + "loss": 0.1602, + "step": 3740 + }, + { + "epoch": 60.0, + "learning_rate": 0.0003958064516129032, + "loss": 0.1308, + "step": 3750 + }, + { + "epoch": 60.0, + "step": 3750, + "train_accuracy": 0.9714231998495958, + "train_f1": 0.9713301650863866, + "train_loss": 0.08129285275936127, + "train_precision": 0.9713703959271734, + "train_recall": 0.9714231998495958, + "train_runtime": 251.5017, + "train_samples_per_second": 63.447, + "train_steps_per_second": 0.994 + }, + { + "epoch": 60.0, + "eval_accuracy": 0.9047350620067643, + "eval_f1": 0.9038436882672473, + "eval_loss": 0.4706019163131714, + "eval_precision": 0.9044402371933982, + "eval_recall": 0.9047350620067643, + "eval_runtime": 30.419, + "eval_samples_per_second": 58.319, + "eval_steps_per_second": 0.92, + "step": 3750 + }, + { + "epoch": 60.16, + "learning_rate": 0.00039419354838709673, + "loss": 0.1425, + "step": 3760 + }, + { + "epoch": 60.32, + "learning_rate": 0.00039258064516129034, + "loss": 0.1372, + "step": 3770 + }, + { + "epoch": 60.48, + "learning_rate": 0.0003909677419354839, + "loss": 0.1429, + "step": 3780 + }, + { + "epoch": 60.64, + "learning_rate": 0.0003893548387096774, + "loss": 0.1366, + "step": 3790 + }, + { + "epoch": 60.8, + "learning_rate": 0.00038774193548387096, + "loss": 0.1503, + "step": 3800 + }, + { + "epoch": 60.96, + "learning_rate": 0.0003861290322580645, + "loss": 0.1558, + "step": 3810 + }, + { + "epoch": 60.99, + "step": 3812, + "train_accuracy": 0.9736165945979821, + "train_f1": 0.9735015663251901, + "train_loss": 0.07483678311109543, + "train_precision": 0.9736053164344912, + "train_recall": 0.9736165945979821, + "train_runtime": 250.9181, + "train_samples_per_second": 63.594, + "train_steps_per_second": 0.996 + }, + { + "epoch": 60.99, + "eval_accuracy": 0.9024802705749718, + "eval_f1": 0.9010669627888577, + "eval_loss": 0.45878297090530396, + "eval_precision": 0.9016661622070999, + "eval_recall": 0.9024802705749718, + "eval_runtime": 30.1022, + "eval_samples_per_second": 58.933, + "eval_steps_per_second": 0.93, + "step": 3812 + }, + { + "epoch": 61.12, + "learning_rate": 0.0003845161290322581, + "loss": 0.1222, + "step": 3820 + }, + { + "epoch": 61.28, + "learning_rate": 0.0003829032258064516, + "loss": 0.1555, + "step": 3830 + }, + { + "epoch": 61.44, + "learning_rate": 0.00038129032258064514, + "loss": 0.1306, + "step": 3840 + }, + { + "epoch": 61.6, + "learning_rate": 0.00037967741935483875, + "loss": 0.1618, + "step": 3850 + }, + { + "epoch": 61.76, + "learning_rate": 0.00037806451612903226, + "loss": 0.1436, + "step": 3860 + }, + { + "epoch": 61.92, + "learning_rate": 0.0003764516129032258, + "loss": 0.1418, + "step": 3870 + }, + { + "epoch": 62.0, + "step": 3875, + "train_accuracy": 0.9729899103841574, + "train_f1": 0.9729148616100128, + "train_loss": 0.07707332819700241, + "train_precision": 0.9730221901785945, + "train_recall": 0.9729899103841574, + "train_runtime": 254.0917, + "train_samples_per_second": 62.8, + "train_steps_per_second": 0.984 + }, + { + "epoch": 62.0, + "eval_accuracy": 0.9013528748590756, + "eval_f1": 0.9008617353869077, + "eval_loss": 0.4513218104839325, + "eval_precision": 0.9012600116180024, + "eval_recall": 0.9013528748590756, + "eval_runtime": 29.9385, + "eval_samples_per_second": 59.255, + "eval_steps_per_second": 0.935, + "step": 3875 + }, + { + "epoch": 62.08, + "learning_rate": 0.0003748387096774193, + "loss": 0.173, + "step": 3880 + }, + { + "epoch": 62.24, + "learning_rate": 0.00037322580645161293, + "loss": 0.1304, + "step": 3890 + }, + { + "epoch": 62.4, + "learning_rate": 0.0003716129032258065, + "loss": 0.1579, + "step": 3900 + }, + { + "epoch": 62.56, + "learning_rate": 0.00037, + "loss": 0.1388, + "step": 3910 + }, + { + "epoch": 62.72, + "learning_rate": 0.00036838709677419355, + "loss": 0.152, + "step": 3920 + }, + { + "epoch": 62.88, + "learning_rate": 0.0003667741935483871, + "loss": 0.1283, + "step": 3930 + }, + { + "epoch": 62.99, + "step": 3937, + "train_accuracy": 0.9760606630318982, + "train_f1": 0.9759960239064865, + "train_loss": 0.07244587689638138, + "train_precision": 0.9760629730336429, + "train_recall": 0.9760606630318982, + "train_runtime": 252.7156, + "train_samples_per_second": 63.142, + "train_steps_per_second": 0.989 + }, + { + "epoch": 62.99, + "eval_accuracy": 0.8990980834272829, + "eval_f1": 0.8976577160373229, + "eval_loss": 0.46622931957244873, + "eval_precision": 0.8981920636717567, + "eval_recall": 0.8990980834272829, + "eval_runtime": 30.0055, + "eval_samples_per_second": 59.122, + "eval_steps_per_second": 0.933, + "step": 3937 + }, + { + "epoch": 63.04, + "learning_rate": 0.00036516129032258067, + "loss": 0.1423, + "step": 3940 + }, + { + "epoch": 63.2, + "learning_rate": 0.0003635483870967742, + "loss": 0.1345, + "step": 3950 + }, + { + "epoch": 63.36, + "learning_rate": 0.00036193548387096773, + "loss": 0.1225, + "step": 3960 + }, + { + "epoch": 63.52, + "learning_rate": 0.00036032258064516134, + "loss": 0.1399, + "step": 3970 + }, + { + "epoch": 63.68, + "learning_rate": 0.00035870967741935485, + "loss": 0.1409, + "step": 3980 + }, + { + "epoch": 63.84, + "learning_rate": 0.0003570967741935484, + "loss": 0.1431, + "step": 3990 + }, + { + "epoch": 64.0, + "learning_rate": 0.0003554838709677419, + "loss": 0.1375, + "step": 4000 + }, + { + "epoch": 64.0, + "step": 4000, + "train_accuracy": 0.9777527104092248, + "train_f1": 0.9776952189068061, + "train_loss": 0.07026933133602142, + "train_precision": 0.9777982624530956, + "train_recall": 0.9777527104092248, + "train_runtime": 254.2798, + "train_samples_per_second": 62.754, + "train_steps_per_second": 0.983 + }, + { + "epoch": 64.0, + "eval_accuracy": 0.90304396843292, + "eval_f1": 0.9018013838304384, + "eval_loss": 0.43738722801208496, + "eval_precision": 0.9018869567596777, + "eval_recall": 0.90304396843292, + "eval_runtime": 30.384, + "eval_samples_per_second": 58.386, + "eval_steps_per_second": 0.922, + "step": 4000 + }, + { + "epoch": 64.16, + "learning_rate": 0.0003538709677419355, + "loss": 0.121, + "step": 4010 + }, + { + "epoch": 64.32, + "learning_rate": 0.000352258064516129, + "loss": 0.1532, + "step": 4020 + }, + { + "epoch": 64.48, + "learning_rate": 0.0003506451612903226, + "loss": 0.1307, + "step": 4030 + }, + { + "epoch": 64.64, + "learning_rate": 0.00034903225806451614, + "loss": 0.1403, + "step": 4040 + }, + { + "epoch": 64.8, + "learning_rate": 0.0003474193548387097, + "loss": 0.1467, + "step": 4050 + }, + { + "epoch": 64.96, + "learning_rate": 0.00034580645161290326, + "loss": 0.1255, + "step": 4060 + }, + { + "epoch": 64.99, + "step": 4062, + "train_accuracy": 0.9758099893463683, + "train_f1": 0.9757450720707583, + "train_loss": 0.0738762691617012, + "train_precision": 0.9758253961539621, + "train_recall": 0.9758099893463683, + "train_runtime": 270.7799, + "train_samples_per_second": 58.93, + "train_steps_per_second": 0.923 + }, + { + "epoch": 64.99, + "eval_accuracy": 0.9041713641488163, + "eval_f1": 0.9027682806702938, + "eval_loss": 0.43671032786369324, + "eval_precision": 0.9029520225388282, + "eval_recall": 0.9041713641488163, + "eval_runtime": 46.2, + "eval_samples_per_second": 38.398, + "eval_steps_per_second": 0.606, + "step": 4062 + }, + { + "epoch": 65.12, + "learning_rate": 0.00034419354838709676, + "loss": 0.1398, + "step": 4070 + }, + { + "epoch": 65.28, + "learning_rate": 0.0003425806451612903, + "loss": 0.1294, + "step": 4080 + }, + { + "epoch": 65.44, + "learning_rate": 0.0003409677419354839, + "loss": 0.1192, + "step": 4090 + }, + { + "epoch": 65.6, + "learning_rate": 0.00033935483870967744, + "loss": 0.1274, + "step": 4100 + }, + { + "epoch": 65.76, + "learning_rate": 0.000337741935483871, + "loss": 0.1332, + "step": 4110 + }, + { + "epoch": 65.92, + "learning_rate": 0.0003361290322580645, + "loss": 0.1356, + "step": 4120 + }, + { + "epoch": 66.0, + "step": 4125, + "train_accuracy": 0.9761233314532807, + "train_f1": 0.9759950169974181, + "train_loss": 0.07223277539014816, + "train_precision": 0.9761999631640097, + "train_recall": 0.9761233314532807, + "train_runtime": 254.9857, + "train_samples_per_second": 62.58, + "train_steps_per_second": 0.98 + }, + { + "epoch": 66.0, + "eval_accuracy": 0.9002254791431793, + "eval_f1": 0.8985023992882097, + "eval_loss": 0.47199180722236633, + "eval_precision": 0.8998740225780659, + "eval_recall": 0.9002254791431793, + "eval_runtime": 46.4259, + "eval_samples_per_second": 38.211, + "eval_steps_per_second": 0.603, + "step": 4125 + }, + { + "epoch": 66.08, + "learning_rate": 0.0003345161290322581, + "loss": 0.1288, + "step": 4130 + }, + { + "epoch": 66.24, + "learning_rate": 0.0003329032258064516, + "loss": 0.1373, + "step": 4140 + }, + { + "epoch": 66.4, + "learning_rate": 0.0003312903225806452, + "loss": 0.1424, + "step": 4150 + }, + { + "epoch": 66.56, + "learning_rate": 0.0003296774193548387, + "loss": 0.1369, + "step": 4160 + }, + { + "epoch": 66.72, + "learning_rate": 0.0003280645161290323, + "loss": 0.1366, + "step": 4170 + }, + { + "epoch": 66.88, + "learning_rate": 0.00032645161290322585, + "loss": 0.1363, + "step": 4180 + }, + { + "epoch": 66.99, + "step": 4187, + "train_accuracy": 0.9745566209187191, + "train_f1": 0.9744197447101088, + "train_loss": 0.07206864655017853, + "train_precision": 0.9746863463745119, + "train_recall": 0.9745566209187191, + "train_runtime": 255.3323, + "train_samples_per_second": 62.495, + "train_steps_per_second": 0.979 + }, + { + "epoch": 66.99, + "eval_accuracy": 0.8974069898534386, + "eval_f1": 0.8958287812129277, + "eval_loss": 0.4912644624710083, + "eval_precision": 0.897064496045103, + "eval_recall": 0.8974069898534386, + "eval_runtime": 42.5384, + "eval_samples_per_second": 41.703, + "eval_steps_per_second": 0.658, + "step": 4187 + }, + { + "epoch": 67.04, + "learning_rate": 0.00032483870967741935, + "loss": 0.1427, + "step": 4190 + }, + { + "epoch": 67.2, + "learning_rate": 0.0003232258064516129, + "loss": 0.1423, + "step": 4200 + }, + { + "epoch": 67.36, + "learning_rate": 0.00032161290322580647, + "loss": 0.1346, + "step": 4210 + }, + { + "epoch": 67.52, + "learning_rate": 0.00032, + "loss": 0.1349, + "step": 4220 + }, + { + "epoch": 67.68, + "learning_rate": 0.00031838709677419353, + "loss": 0.1222, + "step": 4230 + }, + { + "epoch": 67.84, + "learning_rate": 0.0003167741935483871, + "loss": 0.1332, + "step": 4240 + }, + { + "epoch": 68.0, + "learning_rate": 0.0003151612903225807, + "loss": 0.1307, + "step": 4250 + }, + { + "epoch": 68.0, + "step": 4250, + "train_accuracy": 0.9771260261954001, + "train_f1": 0.9770837683590446, + "train_loss": 0.06861759722232819, + "train_precision": 0.9771261688994373, + "train_recall": 0.9771260261954001, + "train_runtime": 258.9242, + "train_samples_per_second": 61.628, + "train_steps_per_second": 0.966 + }, + { + "epoch": 68.0, + "eval_accuracy": 0.9041713641488163, + "eval_f1": 0.9032565731221586, + "eval_loss": 0.440946489572525, + "eval_precision": 0.9031641144165843, + "eval_recall": 0.9041713641488163, + "eval_runtime": 39.476, + "eval_samples_per_second": 44.939, + "eval_steps_per_second": 0.709, + "step": 4250 + }, + { + "epoch": 68.16, + "learning_rate": 0.0003135483870967742, + "loss": 0.1264, + "step": 4260 + }, + { + "epoch": 68.32, + "learning_rate": 0.00031193548387096776, + "loss": 0.1441, + "step": 4270 + }, + { + "epoch": 68.48, + "learning_rate": 0.00031032258064516127, + "loss": 0.127, + "step": 4280 + }, + { + "epoch": 68.64, + "learning_rate": 0.0003087096774193548, + "loss": 0.1392, + "step": 4290 + }, + { + "epoch": 68.8, + "learning_rate": 0.0003070967741935484, + "loss": 0.143, + "step": 4300 + }, + { + "epoch": 68.96, + "learning_rate": 0.00030548387096774194, + "loss": 0.1209, + "step": 4310 + }, + { + "epoch": 68.99, + "step": 4312, + "train_accuracy": 0.975621984082221, + "train_f1": 0.9755696733303983, + "train_loss": 0.07073085755109787, + "train_precision": 0.9756366046304416, + "train_recall": 0.975621984082221, + "train_runtime": 256.8947, + "train_samples_per_second": 62.115, + "train_steps_per_second": 0.973 + }, + { + "epoch": 68.99, + "eval_accuracy": 0.9075535512965051, + "eval_f1": 0.9068001649833212, + "eval_loss": 0.4593234956264496, + "eval_precision": 0.9074473642395492, + "eval_recall": 0.9075535512965051, + "eval_runtime": 44.7864, + "eval_samples_per_second": 39.61, + "eval_steps_per_second": 0.625, + "step": 4312 + }, + { + "epoch": 69.12, + "learning_rate": 0.0003038709677419355, + "loss": 0.1404, + "step": 4320 + }, + { + "epoch": 69.28, + "learning_rate": 0.000302258064516129, + "loss": 0.1417, + "step": 4330 + }, + { + "epoch": 69.44, + "learning_rate": 0.0003006451612903226, + "loss": 0.1137, + "step": 4340 + }, + { + "epoch": 69.6, + "learning_rate": 0.0002990322580645161, + "loss": 0.118, + "step": 4350 + }, + { + "epoch": 69.76, + "learning_rate": 0.0002974193548387097, + "loss": 0.1159, + "step": 4360 + }, + { + "epoch": 69.92, + "learning_rate": 0.0002958064516129032, + "loss": 0.1463, + "step": 4370 + }, + { + "epoch": 70.0, + "step": 4375, + "train_accuracy": 0.9780660525161371, + "train_f1": 0.9780045954132663, + "train_loss": 0.06600172072649002, + "train_precision": 0.9780539223141508, + "train_recall": 0.9780660525161371, + "train_runtime": 257.4363, + "train_samples_per_second": 61.984, + "train_steps_per_second": 0.971 + }, + { + "epoch": 70.0, + "eval_accuracy": 0.9036076662908681, + "eval_f1": 0.9023949767425765, + "eval_loss": 0.4772753119468689, + "eval_precision": 0.9028653821326578, + "eval_recall": 0.9036076662908681, + "eval_runtime": 40.3083, + "eval_samples_per_second": 44.011, + "eval_steps_per_second": 0.695, + "step": 4375 + }, + { + "epoch": 70.08, + "learning_rate": 0.0002941935483870968, + "loss": 0.1154, + "step": 4380 + }, + { + "epoch": 70.24, + "learning_rate": 0.00029258064516129035, + "loss": 0.1401, + "step": 4390 + }, + { + "epoch": 70.4, + "learning_rate": 0.00029096774193548386, + "loss": 0.1348, + "step": 4400 + }, + { + "epoch": 70.56, + "learning_rate": 0.0002893548387096774, + "loss": 0.1333, + "step": 4410 + }, + { + "epoch": 70.72, + "learning_rate": 0.00028774193548387097, + "loss": 0.1292, + "step": 4420 + }, + { + "epoch": 70.88, + "learning_rate": 0.00028612903225806453, + "loss": 0.1217, + "step": 4430 + }, + { + "epoch": 70.99, + "step": 4437, + "train_accuracy": 0.976562010402958, + "train_f1": 0.976489238828708, + "train_loss": 0.0662013366818428, + "train_precision": 0.9765934380829007, + "train_recall": 0.976562010402958, + "train_runtime": 257.6318, + "train_samples_per_second": 61.937, + "train_steps_per_second": 0.97 + }, + { + "epoch": 70.99, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.9089720394690067, + "eval_loss": 0.44329407811164856, + "eval_precision": 0.9092946695808038, + "eval_recall": 0.9098083427282976, + "eval_runtime": 34.2569, + "eval_samples_per_second": 51.785, + "eval_steps_per_second": 0.817, + "step": 4437 + }, + { + "epoch": 71.04, + "learning_rate": 0.0002845161290322581, + "loss": 0.1271, + "step": 4440 + }, + { + "epoch": 71.2, + "learning_rate": 0.0002829032258064516, + "loss": 0.1108, + "step": 4450 + }, + { + "epoch": 71.36, + "learning_rate": 0.0002812903225806452, + "loss": 0.1156, + "step": 4460 + }, + { + "epoch": 71.52, + "learning_rate": 0.0002796774193548387, + "loss": 0.1385, + "step": 4470 + }, + { + "epoch": 71.68, + "learning_rate": 0.00027806451612903227, + "loss": 0.1333, + "step": 4480 + }, + { + "epoch": 71.84, + "learning_rate": 0.00027645161290322577, + "loss": 0.107, + "step": 4490 + }, + { + "epoch": 72.0, + "learning_rate": 0.0002748387096774194, + "loss": 0.1361, + "step": 4500 + }, + { + "epoch": 72.0, + "step": 4500, + "train_accuracy": 0.9758726577677508, + "train_f1": 0.9758377736683368, + "train_loss": 0.0710226520895958, + "train_precision": 0.9758628894264394, + "train_recall": 0.9758726577677508, + "train_runtime": 260.0539, + "train_samples_per_second": 61.36, + "train_steps_per_second": 0.961 + }, + { + "epoch": 72.0, + "eval_accuracy": 0.9075535512965051, + "eval_f1": 0.9071424091130842, + "eval_loss": 0.43427780270576477, + "eval_precision": 0.907402566021774, + "eval_recall": 0.9075535512965051, + "eval_runtime": 30.9428, + "eval_samples_per_second": 57.332, + "eval_steps_per_second": 0.905, + "step": 4500 + }, + { + "epoch": 72.16, + "learning_rate": 0.00027322580645161294, + "loss": 0.1166, + "step": 4510 + }, + { + "epoch": 72.32, + "learning_rate": 0.00027161290322580645, + "loss": 0.1422, + "step": 4520 + }, + { + "epoch": 72.48, + "learning_rate": 0.00027, + "loss": 0.1253, + "step": 4530 + }, + { + "epoch": 72.64, + "learning_rate": 0.00026838709677419356, + "loss": 0.1287, + "step": 4540 + }, + { + "epoch": 72.8, + "learning_rate": 0.0002667741935483871, + "loss": 0.1257, + "step": 4550 + }, + { + "epoch": 72.96, + "learning_rate": 0.0002651612903225806, + "loss": 0.1164, + "step": 4560 + }, + { + "epoch": 72.99, + "step": 4562, + "train_accuracy": 0.9780660525161371, + "train_f1": 0.9780243494069962, + "train_loss": 0.0632663145661354, + "train_precision": 0.978042686088855, + "train_recall": 0.9780660525161371, + "train_runtime": 253.9785, + "train_samples_per_second": 62.828, + "train_steps_per_second": 0.984 + }, + { + "epoch": 72.99, + "eval_accuracy": 0.9013528748590756, + "eval_f1": 0.90025705081473, + "eval_loss": 0.4536808431148529, + "eval_precision": 0.9003205156911865, + "eval_recall": 0.9013528748590756, + "eval_runtime": 30.6887, + "eval_samples_per_second": 57.806, + "eval_steps_per_second": 0.912, + "step": 4562 + }, + { + "epoch": 73.12, + "learning_rate": 0.0002635483870967742, + "loss": 0.1255, + "step": 4570 + }, + { + "epoch": 73.28, + "learning_rate": 0.0002619354838709678, + "loss": 0.1278, + "step": 4580 + }, + { + "epoch": 73.44, + "learning_rate": 0.0002603225806451613, + "loss": 0.1296, + "step": 4590 + }, + { + "epoch": 73.6, + "learning_rate": 0.00025870967741935486, + "loss": 0.1337, + "step": 4600 + }, + { + "epoch": 73.76, + "learning_rate": 0.00025709677419354836, + "loss": 0.1215, + "step": 4610 + }, + { + "epoch": 73.92, + "learning_rate": 0.00025548387096774197, + "loss": 0.1313, + "step": 4620 + }, + { + "epoch": 74.0, + "step": 4625, + "train_accuracy": 0.9766246788243405, + "train_f1": 0.9765319317829301, + "train_loss": 0.06763936579227448, + "train_precision": 0.976670123477298, + "train_recall": 0.9766246788243405, + "train_runtime": 252.1633, + "train_samples_per_second": 63.28, + "train_steps_per_second": 0.991 + }, + { + "epoch": 74.0, + "eval_accuracy": 0.90304396843292, + "eval_f1": 0.9020628517400072, + "eval_loss": 0.44354888796806335, + "eval_precision": 0.9022087129927616, + "eval_recall": 0.90304396843292, + "eval_runtime": 29.2384, + "eval_samples_per_second": 60.674, + "eval_steps_per_second": 0.958, + "step": 4625 + }, + { + "epoch": 74.08, + "learning_rate": 0.0002538709677419355, + "loss": 0.1295, + "step": 4630 + }, + { + "epoch": 74.24, + "learning_rate": 0.00025225806451612903, + "loss": 0.1257, + "step": 4640 + }, + { + "epoch": 74.4, + "learning_rate": 0.0002506451612903226, + "loss": 0.1009, + "step": 4650 + }, + { + "epoch": 74.56, + "learning_rate": 0.00024903225806451615, + "loss": 0.1254, + "step": 4660 + }, + { + "epoch": 74.72, + "learning_rate": 0.00024741935483870965, + "loss": 0.1291, + "step": 4670 + }, + { + "epoch": 74.88, + "learning_rate": 0.00024580645161290327, + "loss": 0.1291, + "step": 4680 + }, + { + "epoch": 74.99, + "step": 4687, + "train_accuracy": 0.97737669988093, + "train_f1": 0.9773174710084362, + "train_loss": 0.06480351090431213, + "train_precision": 0.9773832839286731, + "train_recall": 0.97737669988093, + "train_runtime": 266.3469, + "train_samples_per_second": 59.911, + "train_steps_per_second": 0.939 + }, + { + "epoch": 74.99, + "eval_accuracy": 0.9052987598647125, + "eval_f1": 0.9045329930179677, + "eval_loss": 0.44802290201187134, + "eval_precision": 0.9044815010640291, + "eval_recall": 0.9052987598647125, + "eval_runtime": 46.7594, + "eval_samples_per_second": 37.939, + "eval_steps_per_second": 0.599, + "step": 4687 + }, + { + "epoch": 75.04, + "learning_rate": 0.00024419354838709677, + "loss": 0.1316, + "step": 4690 + }, + { + "epoch": 75.2, + "learning_rate": 0.00024258064516129033, + "loss": 0.1315, + "step": 4700 + }, + { + "epoch": 75.36, + "learning_rate": 0.0002409677419354839, + "loss": 0.11, + "step": 4710 + }, + { + "epoch": 75.52, + "learning_rate": 0.00023935483870967742, + "loss": 0.1253, + "step": 4720 + }, + { + "epoch": 75.68, + "learning_rate": 0.00023774193548387098, + "loss": 0.1193, + "step": 4730 + }, + { + "epoch": 75.84, + "learning_rate": 0.0002361290322580645, + "loss": 0.1122, + "step": 4740 + }, + { + "epoch": 76.0, + "learning_rate": 0.0002345161290322581, + "loss": 0.132, + "step": 4750 + }, + { + "epoch": 76.0, + "step": 4750, + "train_accuracy": 0.9790060788368741, + "train_f1": 0.9789581823313721, + "train_loss": 0.060503240674734116, + "train_precision": 0.9790325316940118, + "train_recall": 0.9790060788368741, + "train_runtime": 258.2855, + "train_samples_per_second": 61.78, + "train_steps_per_second": 0.968 + }, + { + "epoch": 76.0, + "eval_accuracy": 0.9058624577226606, + "eval_f1": 0.904482480434317, + "eval_loss": 0.46171513199806213, + "eval_precision": 0.9050040535484158, + "eval_recall": 0.9058624577226606, + "eval_runtime": 42.9199, + "eval_samples_per_second": 41.333, + "eval_steps_per_second": 0.652, + "step": 4750 + }, + { + "epoch": 76.16, + "learning_rate": 0.00023290322580645162, + "loss": 0.1141, + "step": 4760 + }, + { + "epoch": 76.32, + "learning_rate": 0.00023129032258064516, + "loss": 0.111, + "step": 4770 + }, + { + "epoch": 76.48, + "learning_rate": 0.0002296774193548387, + "loss": 0.1362, + "step": 4780 + }, + { + "epoch": 76.64, + "learning_rate": 0.00022806451612903224, + "loss": 0.1303, + "step": 4790 + }, + { + "epoch": 76.8, + "learning_rate": 0.0002264516129032258, + "loss": 0.1128, + "step": 4800 + }, + { + "epoch": 76.96, + "learning_rate": 0.00022483870967741936, + "loss": 0.1197, + "step": 4810 + }, + { + "epoch": 76.99, + "step": 4812, + "train_accuracy": 0.9780033840947546, + "train_f1": 0.9779246149243382, + "train_loss": 0.06653548032045364, + "train_precision": 0.9780194906548296, + "train_recall": 0.9780033840947546, + "train_runtime": 255.9446, + "train_samples_per_second": 62.346, + "train_steps_per_second": 0.977 + }, + { + "epoch": 76.99, + "eval_accuracy": 0.9058624577226606, + "eval_f1": 0.904136777153852, + "eval_loss": 0.45922645926475525, + "eval_precision": 0.9042117586049208, + "eval_recall": 0.9058624577226606, + "eval_runtime": 37.615, + "eval_samples_per_second": 47.162, + "eval_steps_per_second": 0.744, + "step": 4812 + }, + { + "epoch": 77.12, + "learning_rate": 0.00022322580645161292, + "loss": 0.1185, + "step": 4820 + }, + { + "epoch": 77.28, + "learning_rate": 0.00022161290322580645, + "loss": 0.1297, + "step": 4830 + }, + { + "epoch": 77.44, + "learning_rate": 0.00022, + "loss": 0.1167, + "step": 4840 + }, + { + "epoch": 77.6, + "learning_rate": 0.00021838709677419354, + "loss": 0.1185, + "step": 4850 + }, + { + "epoch": 77.76, + "learning_rate": 0.0002167741935483871, + "loss": 0.1095, + "step": 4860 + }, + { + "epoch": 77.92, + "learning_rate": 0.00021516129032258063, + "loss": 0.1199, + "step": 4870 + }, + { + "epoch": 78.0, + "step": 4875, + "train_accuracy": 0.9789434104154916, + "train_f1": 0.9789055395945256, + "train_loss": 0.06331050395965576, + "train_precision": 0.9789060427186901, + "train_recall": 0.9789434104154916, + "train_runtime": 255.8602, + "train_samples_per_second": 62.366, + "train_steps_per_second": 0.977 + }, + { + "epoch": 78.0, + "eval_accuracy": 0.9160090191657272, + "eval_f1": 0.9156304372279399, + "eval_loss": 0.4329167902469635, + "eval_precision": 0.9156707582414666, + "eval_recall": 0.9160090191657272, + "eval_runtime": 36.868, + "eval_samples_per_second": 48.118, + "eval_steps_per_second": 0.759, + "step": 4875 + }, + { + "epoch": 78.08, + "learning_rate": 0.0002135483870967742, + "loss": 0.1098, + "step": 4880 + }, + { + "epoch": 78.24, + "learning_rate": 0.00021193548387096774, + "loss": 0.1134, + "step": 4890 + }, + { + "epoch": 78.4, + "learning_rate": 0.0002103225806451613, + "loss": 0.1103, + "step": 4900 + }, + { + "epoch": 78.56, + "learning_rate": 0.00020870967741935483, + "loss": 0.1266, + "step": 4910 + }, + { + "epoch": 78.72, + "learning_rate": 0.0002070967741935484, + "loss": 0.1083, + "step": 4920 + }, + { + "epoch": 78.88, + "learning_rate": 0.00020548387096774192, + "loss": 0.124, + "step": 4930 + }, + { + "epoch": 78.99, + "step": 4937, + "train_accuracy": 0.978191389358902, + "train_f1": 0.9781684943478942, + "train_loss": 0.06074240058660507, + "train_precision": 0.9782183084448501, + "train_recall": 0.978191389358902, + "train_runtime": 255.7407, + "train_samples_per_second": 62.395, + "train_steps_per_second": 0.978 + }, + { + "epoch": 78.99, + "eval_accuracy": 0.9148816234498309, + "eval_f1": 0.914220821168919, + "eval_loss": 0.4336497485637665, + "eval_precision": 0.9142236947687447, + "eval_recall": 0.9148816234498309, + "eval_runtime": 31.4364, + "eval_samples_per_second": 56.431, + "eval_steps_per_second": 0.891, + "step": 4937 + }, + { + "epoch": 79.04, + "learning_rate": 0.00020387096774193548, + "loss": 0.13, + "step": 4940 + }, + { + "epoch": 79.2, + "learning_rate": 0.00020225806451612904, + "loss": 0.1212, + "step": 4950 + }, + { + "epoch": 79.36, + "learning_rate": 0.0002006451612903226, + "loss": 0.0928, + "step": 4960 + }, + { + "epoch": 79.52, + "learning_rate": 0.00019903225806451613, + "loss": 0.1227, + "step": 4970 + }, + { + "epoch": 79.68, + "learning_rate": 0.00019741935483870969, + "loss": 0.1283, + "step": 4980 + }, + { + "epoch": 79.84, + "learning_rate": 0.00019580645161290322, + "loss": 0.1174, + "step": 4990 + }, + { + "epoch": 80.0, + "learning_rate": 0.00019419354838709678, + "loss": 0.118, + "step": 5000 + }, + { + "epoch": 80.0, + "step": 5000, + "train_accuracy": 0.9766246788243405, + "train_f1": 0.9765541131510495, + "train_loss": 0.0666293278336525, + "train_precision": 0.9766229850859756, + "train_recall": 0.9766246788243405, + "train_runtime": 259.1008, + "train_samples_per_second": 61.586, + "train_steps_per_second": 0.965 + }, + { + "epoch": 80.0, + "eval_accuracy": 0.9086809470124013, + "eval_f1": 0.90750875812957, + "eval_loss": 0.45560184121131897, + "eval_precision": 0.9079785564126395, + "eval_recall": 0.9086809470124013, + "eval_runtime": 31.9115, + "eval_samples_per_second": 55.591, + "eval_steps_per_second": 0.877, + "step": 5000 + }, + { + "epoch": 80.16, + "learning_rate": 0.0001925806451612903, + "loss": 0.0913, + "step": 5010 + }, + { + "epoch": 80.32, + "learning_rate": 0.0001909677419354839, + "loss": 0.124, + "step": 5020 + }, + { + "epoch": 80.48, + "learning_rate": 0.00018935483870967742, + "loss": 0.1009, + "step": 5030 + }, + { + "epoch": 80.64, + "learning_rate": 0.00018774193548387098, + "loss": 0.1101, + "step": 5040 + }, + { + "epoch": 80.8, + "learning_rate": 0.0001861290322580645, + "loss": 0.1066, + "step": 5050 + }, + { + "epoch": 80.96, + "learning_rate": 0.00018451612903225807, + "loss": 0.1135, + "step": 5060 + }, + { + "epoch": 80.99, + "step": 5062, + "train_accuracy": 0.9778153788306072, + "train_f1": 0.9777754434919425, + "train_loss": 0.0617908276617527, + "train_precision": 0.9778452684709104, + "train_recall": 0.9778153788306072, + "train_runtime": 256.2036, + "train_samples_per_second": 62.282, + "train_steps_per_second": 0.976 + }, + { + "epoch": 80.99, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.9089912962963755, + "eval_loss": 0.4518835246562958, + "eval_precision": 0.9089474065575852, + "eval_recall": 0.9098083427282976, + "eval_runtime": 30.3175, + "eval_samples_per_second": 58.514, + "eval_steps_per_second": 0.924, + "step": 5062 + }, + { + "epoch": 81.12, + "learning_rate": 0.0001829032258064516, + "loss": 0.1126, + "step": 5070 + }, + { + "epoch": 81.28, + "learning_rate": 0.0001812903225806452, + "loss": 0.1193, + "step": 5080 + }, + { + "epoch": 81.44, + "learning_rate": 0.00017967741935483872, + "loss": 0.1, + "step": 5090 + }, + { + "epoch": 81.6, + "learning_rate": 0.00017806451612903228, + "loss": 0.1133, + "step": 5100 + }, + { + "epoch": 81.76, + "learning_rate": 0.0001764516129032258, + "loss": 0.1108, + "step": 5110 + }, + { + "epoch": 81.92, + "learning_rate": 0.00017483870967741936, + "loss": 0.1047, + "step": 5120 + }, + { + "epoch": 82.0, + "step": 5125, + "train_accuracy": 0.9804474525286708, + "train_f1": 0.9803932668866718, + "train_loss": 0.05864088237285614, + "train_precision": 0.9804524158920493, + "train_recall": 0.9804474525286708, + "train_runtime": 255.5222, + "train_samples_per_second": 62.449, + "train_steps_per_second": 0.978 + }, + { + "epoch": 82.0, + "eval_accuracy": 0.9086809470124013, + "eval_f1": 0.9077573719855858, + "eval_loss": 0.46281760931015015, + "eval_precision": 0.9078136987288633, + "eval_recall": 0.9086809470124013, + "eval_runtime": 29.6004, + "eval_samples_per_second": 59.932, + "eval_steps_per_second": 0.946, + "step": 5125 + }, + { + "epoch": 82.08, + "learning_rate": 0.0001732258064516129, + "loss": 0.1049, + "step": 5130 + }, + { + "epoch": 82.24, + "learning_rate": 0.00017161290322580645, + "loss": 0.1198, + "step": 5140 + }, + { + "epoch": 82.4, + "learning_rate": 0.00017, + "loss": 0.1013, + "step": 5150 + }, + { + "epoch": 82.56, + "learning_rate": 0.00016838709677419357, + "loss": 0.1196, + "step": 5160 + }, + { + "epoch": 82.72, + "learning_rate": 0.0001667741935483871, + "loss": 0.1076, + "step": 5170 + }, + { + "epoch": 82.88, + "learning_rate": 0.00016516129032258066, + "loss": 0.0977, + "step": 5180 + }, + { + "epoch": 82.99, + "step": 5187, + "train_accuracy": 0.9785047314658144, + "train_f1": 0.9784666340134569, + "train_loss": 0.061825916171073914, + "train_precision": 0.9784978840692157, + "train_recall": 0.9785047314658144, + "train_runtime": 254.0139, + "train_samples_per_second": 62.819, + "train_steps_per_second": 0.984 + }, + { + "epoch": 82.99, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.9089103848523707, + "eval_loss": 0.4576462209224701, + "eval_precision": 0.9088881743285697, + "eval_recall": 0.9098083427282976, + "eval_runtime": 29.6463, + "eval_samples_per_second": 59.839, + "eval_steps_per_second": 0.944, + "step": 5187 + }, + { + "epoch": 83.04, + "learning_rate": 0.0001635483870967742, + "loss": 0.0983, + "step": 5190 + }, + { + "epoch": 83.2, + "learning_rate": 0.00016193548387096775, + "loss": 0.1126, + "step": 5200 + }, + { + "epoch": 83.36, + "learning_rate": 0.00016032258064516128, + "loss": 0.1362, + "step": 5210 + }, + { + "epoch": 83.52, + "learning_rate": 0.00015870967741935487, + "loss": 0.1073, + "step": 5220 + }, + { + "epoch": 83.68, + "learning_rate": 0.0001570967741935484, + "loss": 0.111, + "step": 5230 + }, + { + "epoch": 83.84, + "learning_rate": 0.00015548387096774195, + "loss": 0.1125, + "step": 5240 + }, + { + "epoch": 84.0, + "learning_rate": 0.00015387096774193549, + "loss": 0.1149, + "step": 5250 + }, + { + "epoch": 84.0, + "step": 5250, + "train_accuracy": 0.980886131478348, + "train_f1": 0.9808510090193131, + "train_loss": 0.05500573664903641, + "train_precision": 0.9808883134493344, + "train_recall": 0.980886131478348, + "train_runtime": 260.1325, + "train_samples_per_second": 61.342, + "train_steps_per_second": 0.961 + }, + { + "epoch": 84.0, + "eval_accuracy": 0.9120631341600902, + "eval_f1": 0.911138462348306, + "eval_loss": 0.4605374038219452, + "eval_precision": 0.9110930924781012, + "eval_recall": 0.9120631341600902, + "eval_runtime": 29.9251, + "eval_samples_per_second": 59.281, + "eval_steps_per_second": 0.936, + "step": 5250 + }, + { + "epoch": 84.16, + "learning_rate": 0.00015225806451612904, + "loss": 0.0955, + "step": 5260 + }, + { + "epoch": 84.32, + "learning_rate": 0.00015064516129032257, + "loss": 0.1187, + "step": 5270 + }, + { + "epoch": 84.48, + "learning_rate": 0.0001490322580645161, + "loss": 0.1101, + "step": 5280 + }, + { + "epoch": 84.64, + "learning_rate": 0.0001474193548387097, + "loss": 0.106, + "step": 5290 + }, + { + "epoch": 84.8, + "learning_rate": 0.00014580645161290322, + "loss": 0.099, + "step": 5300 + }, + { + "epoch": 84.96, + "learning_rate": 0.00014419354838709678, + "loss": 0.1241, + "step": 5310 + }, + { + "epoch": 84.99, + "step": 5312, + "train_accuracy": 0.9786927367299618, + "train_f1": 0.9786376256812076, + "train_loss": 0.0632563978433609, + "train_precision": 0.9786888233858622, + "train_recall": 0.9786927367299618, + "train_runtime": 253.1029, + "train_samples_per_second": 63.046, + "train_steps_per_second": 0.988 + }, + { + "epoch": 84.99, + "eval_accuracy": 0.9131905298759865, + "eval_f1": 0.9126492909049198, + "eval_loss": 0.4541684687137604, + "eval_precision": 0.9125019654104216, + "eval_recall": 0.9131905298759865, + "eval_runtime": 36.7528, + "eval_samples_per_second": 48.268, + "eval_steps_per_second": 0.762, + "step": 5312 + }, + { + "epoch": 85.12, + "learning_rate": 0.0001425806451612903, + "loss": 0.1174, + "step": 5320 + }, + { + "epoch": 85.28, + "learning_rate": 0.00014096774193548387, + "loss": 0.1129, + "step": 5330 + }, + { + "epoch": 85.44, + "learning_rate": 0.0001393548387096774, + "loss": 0.0965, + "step": 5340 + }, + { + "epoch": 85.6, + "learning_rate": 0.00013774193548387099, + "loss": 0.0996, + "step": 5350 + }, + { + "epoch": 85.76, + "learning_rate": 0.00013612903225806452, + "loss": 0.1188, + "step": 5360 + }, + { + "epoch": 85.92, + "learning_rate": 0.00013451612903225807, + "loss": 0.1262, + "step": 5370 + }, + { + "epoch": 86.0, + "step": 5375, + "train_accuracy": 0.9786927367299618, + "train_f1": 0.9786430332999637, + "train_loss": 0.06230182945728302, + "train_precision": 0.9787228334576183, + "train_recall": 0.9786927367299618, + "train_runtime": 254.0805, + "train_samples_per_second": 62.803, + "train_steps_per_second": 0.984 + }, + { + "epoch": 86.0, + "eval_accuracy": 0.9081172491544532, + "eval_f1": 0.9073973897547527, + "eval_loss": 0.45515382289886475, + "eval_precision": 0.9074033025441, + "eval_recall": 0.9081172491544532, + "eval_runtime": 29.4676, + "eval_samples_per_second": 60.202, + "eval_steps_per_second": 0.95, + "step": 5375 + }, + { + "epoch": 86.08, + "learning_rate": 0.0001329032258064516, + "loss": 0.1058, + "step": 5380 + }, + { + "epoch": 86.24, + "learning_rate": 0.00013129032258064516, + "loss": 0.1131, + "step": 5390 + }, + { + "epoch": 86.4, + "learning_rate": 0.0001296774193548387, + "loss": 0.1009, + "step": 5400 + }, + { + "epoch": 86.56, + "learning_rate": 0.00012806451612903225, + "loss": 0.1047, + "step": 5410 + }, + { + "epoch": 86.72, + "learning_rate": 0.0001264516129032258, + "loss": 0.1055, + "step": 5420 + }, + { + "epoch": 86.88, + "learning_rate": 0.00012483870967741934, + "loss": 0.1114, + "step": 5430 + }, + { + "epoch": 86.99, + "step": 5437, + "train_accuracy": 0.9789434104154916, + "train_f1": 0.9788727186488501, + "train_loss": 0.05873732641339302, + "train_precision": 0.9789898516890257, + "train_recall": 0.9789434104154916, + "train_runtime": 252.3787, + "train_samples_per_second": 63.226, + "train_steps_per_second": 0.991 + }, + { + "epoch": 86.99, + "eval_accuracy": 0.9081172491544532, + "eval_f1": 0.907050844291005, + "eval_loss": 0.46865400671958923, + "eval_precision": 0.9071698989671065, + "eval_recall": 0.9081172491544532, + "eval_runtime": 29.1201, + "eval_samples_per_second": 60.92, + "eval_steps_per_second": 0.962, + "step": 5437 + }, + { + "epoch": 87.04, + "learning_rate": 0.0001232258064516129, + "loss": 0.0953, + "step": 5440 + }, + { + "epoch": 87.2, + "learning_rate": 0.00012161290322580646, + "loss": 0.0895, + "step": 5450 + }, + { + "epoch": 87.36, + "learning_rate": 0.00012, + "loss": 0.1122, + "step": 5460 + }, + { + "epoch": 87.52, + "learning_rate": 0.00011838709677419356, + "loss": 0.1169, + "step": 5470 + }, + { + "epoch": 87.68, + "learning_rate": 0.0001167741935483871, + "loss": 0.1031, + "step": 5480 + }, + { + "epoch": 87.84, + "learning_rate": 0.00011516129032258065, + "loss": 0.1052, + "step": 5490 + }, + { + "epoch": 88.0, + "learning_rate": 0.00011354838709677421, + "loss": 0.0835, + "step": 5500 + }, + { + "epoch": 88.0, + "step": 5500, + "train_accuracy": 0.9811994735852604, + "train_f1": 0.9811537862655559, + "train_loss": 0.05588332563638687, + "train_precision": 0.9812213014473542, + "train_recall": 0.9811994735852604, + "train_runtime": 251.2614, + "train_samples_per_second": 63.508, + "train_steps_per_second": 0.995 + }, + { + "epoch": 88.0, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.908811337868185, + "eval_loss": 0.4702069163322449, + "eval_precision": 0.9089224577124531, + "eval_recall": 0.9098083427282976, + "eval_runtime": 29.3425, + "eval_samples_per_second": 60.458, + "eval_steps_per_second": 0.954, + "step": 5500 + }, + { + "epoch": 88.16, + "learning_rate": 0.00011193548387096775, + "loss": 0.1158, + "step": 5510 + }, + { + "epoch": 88.32, + "learning_rate": 0.0001103225806451613, + "loss": 0.1029, + "step": 5520 + }, + { + "epoch": 88.48, + "learning_rate": 0.00010870967741935483, + "loss": 0.0978, + "step": 5530 + }, + { + "epoch": 88.64, + "learning_rate": 0.00010709677419354839, + "loss": 0.1028, + "step": 5540 + }, + { + "epoch": 88.8, + "learning_rate": 0.00010548387096774193, + "loss": 0.1053, + "step": 5550 + }, + { + "epoch": 88.96, + "learning_rate": 0.00010387096774193548, + "loss": 0.1174, + "step": 5560 + }, + { + "epoch": 88.99, + "step": 5562, + "train_accuracy": 0.9806981262142007, + "train_f1": 0.980651790063678, + "train_loss": 0.05491610988974571, + "train_precision": 0.9806996834447476, + "train_recall": 0.9806981262142007, + "train_runtime": 252.31, + "train_samples_per_second": 63.244, + "train_steps_per_second": 0.991 + }, + { + "epoch": 88.99, + "eval_accuracy": 0.9058624577226606, + "eval_f1": 0.9047448567468519, + "eval_loss": 0.4772195518016815, + "eval_precision": 0.9049010825589036, + "eval_recall": 0.9058624577226606, + "eval_runtime": 29.443, + "eval_samples_per_second": 60.252, + "eval_steps_per_second": 0.951, + "step": 5562 + }, + { + "epoch": 89.12, + "learning_rate": 0.00010225806451612903, + "loss": 0.1106, + "step": 5570 + }, + { + "epoch": 89.28, + "learning_rate": 0.00010064516129032258, + "loss": 0.0965, + "step": 5580 + }, + { + "epoch": 89.44, + "learning_rate": 9.903225806451612e-05, + "loss": 0.1145, + "step": 5590 + }, + { + "epoch": 89.6, + "learning_rate": 9.741935483870967e-05, + "loss": 0.1069, + "step": 5600 + }, + { + "epoch": 89.76, + "learning_rate": 9.580645161290323e-05, + "loss": 0.1008, + "step": 5610 + }, + { + "epoch": 89.92, + "learning_rate": 9.419354838709677e-05, + "loss": 0.103, + "step": 5620 + }, + { + "epoch": 90.0, + "step": 5625, + "train_accuracy": 0.9815754841135552, + "train_f1": 0.9815346716904167, + "train_loss": 0.05348266288638115, + "train_precision": 0.9815797384790222, + "train_recall": 0.9815754841135552, + "train_runtime": 252.062, + "train_samples_per_second": 63.306, + "train_steps_per_second": 0.992 + }, + { + "epoch": 90.0, + "eval_accuracy": 0.9075535512965051, + "eval_f1": 0.9065308517183118, + "eval_loss": 0.4701833724975586, + "eval_precision": 0.9065469838144254, + "eval_recall": 0.9075535512965051, + "eval_runtime": 29.5689, + "eval_samples_per_second": 59.995, + "eval_steps_per_second": 0.947, + "step": 5625 + }, + { + "epoch": 90.08, + "learning_rate": 9.258064516129032e-05, + "loss": 0.0977, + "step": 5630 + }, + { + "epoch": 90.24, + "learning_rate": 9.096774193548387e-05, + "loss": 0.0948, + "step": 5640 + }, + { + "epoch": 90.4, + "learning_rate": 8.935483870967742e-05, + "loss": 0.0803, + "step": 5650 + }, + { + "epoch": 90.56, + "learning_rate": 8.774193548387096e-05, + "loss": 0.1025, + "step": 5660 + }, + { + "epoch": 90.72, + "learning_rate": 8.612903225806452e-05, + "loss": 0.1056, + "step": 5670 + }, + { + "epoch": 90.88, + "learning_rate": 8.451612903225807e-05, + "loss": 0.1086, + "step": 5680 + }, + { + "epoch": 90.99, + "step": 5687, + "train_accuracy": 0.9794447577865514, + "train_f1": 0.9794042503142155, + "train_loss": 0.05922425910830498, + "train_precision": 0.979461044879249, + "train_recall": 0.9794447577865514, + "train_runtime": 253.2112, + "train_samples_per_second": 63.019, + "train_steps_per_second": 0.987 + }, + { + "epoch": 90.99, + "eval_accuracy": 0.9081172491544532, + "eval_f1": 0.9074336656449804, + "eval_loss": 0.4727869927883148, + "eval_precision": 0.9075490421138191, + "eval_recall": 0.9081172491544532, + "eval_runtime": 29.3104, + "eval_samples_per_second": 60.525, + "eval_steps_per_second": 0.955, + "step": 5687 + }, + { + "epoch": 91.04, + "learning_rate": 8.290322580645161e-05, + "loss": 0.1074, + "step": 5690 + }, + { + "epoch": 91.2, + "learning_rate": 8.129032258064515e-05, + "loss": 0.1017, + "step": 5700 + }, + { + "epoch": 91.36, + "learning_rate": 7.967741935483871e-05, + "loss": 0.0971, + "step": 5710 + }, + { + "epoch": 91.52, + "learning_rate": 7.806451612903226e-05, + "loss": 0.1191, + "step": 5720 + }, + { + "epoch": 91.68, + "learning_rate": 7.64516129032258e-05, + "loss": 0.0995, + "step": 5730 + }, + { + "epoch": 91.84, + "learning_rate": 7.483870967741936e-05, + "loss": 0.1142, + "step": 5740 + }, + { + "epoch": 92.0, + "learning_rate": 7.32258064516129e-05, + "loss": 0.1087, + "step": 5750 + }, + { + "epoch": 92.0, + "step": 5750, + "train_accuracy": 0.9796954314720813, + "train_f1": 0.9796272548376432, + "train_loss": 0.05848938599228859, + "train_precision": 0.9797640627474504, + "train_recall": 0.9796954314720813, + "train_runtime": 254.0646, + "train_samples_per_second": 62.807, + "train_steps_per_second": 0.984 + }, + { + "epoch": 92.0, + "eval_accuracy": 0.9075535512965051, + "eval_f1": 0.9065640932569152, + "eval_loss": 0.4691586196422577, + "eval_precision": 0.9068629961295476, + "eval_recall": 0.9075535512965051, + "eval_runtime": 29.6211, + "eval_samples_per_second": 59.89, + "eval_steps_per_second": 0.945, + "step": 5750 + }, + { + "epoch": 92.16, + "learning_rate": 7.161290322580645e-05, + "loss": 0.1036, + "step": 5760 + }, + { + "epoch": 92.32, + "learning_rate": 7.000000000000001e-05, + "loss": 0.0991, + "step": 5770 + }, + { + "epoch": 92.48, + "learning_rate": 6.838709677419355e-05, + "loss": 0.1042, + "step": 5780 + }, + { + "epoch": 92.64, + "learning_rate": 6.67741935483871e-05, + "loss": 0.0997, + "step": 5790 + }, + { + "epoch": 92.8, + "learning_rate": 6.516129032258064e-05, + "loss": 0.0921, + "step": 5800 + }, + { + "epoch": 92.96, + "learning_rate": 6.35483870967742e-05, + "loss": 0.1041, + "step": 5810 + }, + { + "epoch": 92.99, + "step": 5812, + "train_accuracy": 0.979131415679639, + "train_f1": 0.9790799084691278, + "train_loss": 0.0558781623840332, + "train_precision": 0.9791236279775183, + "train_recall": 0.979131415679639, + "train_runtime": 253.4519, + "train_samples_per_second": 62.959, + "train_steps_per_second": 0.986 + }, + { + "epoch": 92.99, + "eval_accuracy": 0.9086809470124013, + "eval_f1": 0.9078901238987158, + "eval_loss": 0.45839613676071167, + "eval_precision": 0.9079266988153948, + "eval_recall": 0.9086809470124013, + "eval_runtime": 29.4342, + "eval_samples_per_second": 60.27, + "eval_steps_per_second": 0.951, + "step": 5812 + }, + { + "epoch": 93.12, + "learning_rate": 6.193548387096774e-05, + "loss": 0.116, + "step": 5820 + }, + { + "epoch": 93.28, + "learning_rate": 6.0322580645161295e-05, + "loss": 0.103, + "step": 5830 + }, + { + "epoch": 93.44, + "learning_rate": 5.870967741935484e-05, + "loss": 0.1129, + "step": 5840 + }, + { + "epoch": 93.6, + "learning_rate": 5.709677419354839e-05, + "loss": 0.1034, + "step": 5850 + }, + { + "epoch": 93.76, + "learning_rate": 5.5483870967741936e-05, + "loss": 0.1134, + "step": 5860 + }, + { + "epoch": 93.92, + "learning_rate": 5.387096774193549e-05, + "loss": 0.1109, + "step": 5870 + }, + { + "epoch": 94.0, + "step": 5875, + "train_accuracy": 0.9796327630506988, + "train_f1": 0.9795750790681897, + "train_loss": 0.05882110819220543, + "train_precision": 0.9796458578935032, + "train_recall": 0.9796327630506988, + "train_runtime": 253.4567, + "train_samples_per_second": 62.957, + "train_steps_per_second": 0.986 + }, + { + "epoch": 94.0, + "eval_accuracy": 0.9092446448703495, + "eval_f1": 0.9084684905580462, + "eval_loss": 0.45630526542663574, + "eval_precision": 0.9085750924367912, + "eval_recall": 0.9092446448703495, + "eval_runtime": 29.3382, + "eval_samples_per_second": 60.467, + "eval_steps_per_second": 0.954, + "step": 5875 + }, + { + "epoch": 94.08, + "learning_rate": 5.225806451612904e-05, + "loss": 0.0848, + "step": 5880 + }, + { + "epoch": 94.24, + "learning_rate": 5.064516129032258e-05, + "loss": 0.1063, + "step": 5890 + }, + { + "epoch": 94.4, + "learning_rate": 4.9032258064516135e-05, + "loss": 0.0929, + "step": 5900 + }, + { + "epoch": 94.56, + "learning_rate": 4.741935483870968e-05, + "loss": 0.1039, + "step": 5910 + }, + { + "epoch": 94.72, + "learning_rate": 4.580645161290323e-05, + "loss": 0.099, + "step": 5920 + }, + { + "epoch": 94.88, + "learning_rate": 4.4193548387096775e-05, + "loss": 0.1026, + "step": 5930 + }, + { + "epoch": 94.99, + "step": 5937, + "train_accuracy": 0.9824528420129097, + "train_f1": 0.9824112060096897, + "train_loss": 0.05392773821949959, + "train_precision": 0.9824786275566851, + "train_recall": 0.9824528420129097, + "train_runtime": 252.5661, + "train_samples_per_second": 63.179, + "train_steps_per_second": 0.99 + }, + { + "epoch": 94.99, + "eval_accuracy": 0.9126268320180383, + "eval_f1": 0.9117417948122799, + "eval_loss": 0.4614848494529724, + "eval_precision": 0.911808467672524, + "eval_recall": 0.9126268320180383, + "eval_runtime": 29.3223, + "eval_samples_per_second": 60.5, + "eval_steps_per_second": 0.955, + "step": 5937 + }, + { + "epoch": 95.04, + "learning_rate": 4.258064516129032e-05, + "loss": 0.1026, + "step": 5940 + }, + { + "epoch": 95.2, + "learning_rate": 4.096774193548387e-05, + "loss": 0.0942, + "step": 5950 + }, + { + "epoch": 95.36, + "learning_rate": 3.9354838709677416e-05, + "loss": 0.0936, + "step": 5960 + }, + { + "epoch": 95.52, + "learning_rate": 3.774193548387097e-05, + "loss": 0.0833, + "step": 5970 + }, + { + "epoch": 95.68, + "learning_rate": 3.612903225806452e-05, + "loss": 0.096, + "step": 5980 + }, + { + "epoch": 95.84, + "learning_rate": 3.451612903225806e-05, + "loss": 0.106, + "step": 5990 + }, + { + "epoch": 96.0, + "learning_rate": 3.2903225806451614e-05, + "loss": 0.0895, + "step": 6000 + }, + { + "epoch": 96.0, + "step": 6000, + "train_accuracy": 0.9817634893777025, + "train_f1": 0.9817217910538368, + "train_loss": 0.053316567093133926, + "train_precision": 0.981748670799393, + "train_recall": 0.9817634893777025, + "train_runtime": 254.5693, + "train_samples_per_second": 62.682, + "train_steps_per_second": 0.982 + }, + { + "epoch": 96.0, + "eval_accuracy": 0.9092446448703495, + "eval_f1": 0.908628347766777, + "eval_loss": 0.4585205316543579, + "eval_precision": 0.9086756206676724, + "eval_recall": 0.9092446448703495, + "eval_runtime": 29.7078, + "eval_samples_per_second": 59.715, + "eval_steps_per_second": 0.943, + "step": 6000 + }, + { + "epoch": 96.16, + "learning_rate": 3.129032258064516e-05, + "loss": 0.1019, + "step": 6010 + }, + { + "epoch": 96.32, + "learning_rate": 2.967741935483871e-05, + "loss": 0.1046, + "step": 6020 + }, + { + "epoch": 96.48, + "learning_rate": 2.806451612903226e-05, + "loss": 0.125, + "step": 6030 + }, + { + "epoch": 96.64, + "learning_rate": 2.6451612903225806e-05, + "loss": 0.0977, + "step": 6040 + }, + { + "epoch": 96.8, + "learning_rate": 2.4838709677419354e-05, + "loss": 0.104, + "step": 6050 + }, + { + "epoch": 96.96, + "learning_rate": 2.3225806451612906e-05, + "loss": 0.1045, + "step": 6060 + }, + { + "epoch": 96.99, + "step": 6062, + "train_accuracy": 0.9817008209563202, + "train_f1": 0.9816508134314273, + "train_loss": 0.05279777571558952, + "train_precision": 0.9816910608821193, + "train_recall": 0.9817008209563202, + "train_runtime": 252.7534, + "train_samples_per_second": 63.133, + "train_steps_per_second": 0.989 + }, + { + "epoch": 96.99, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.909112926020381, + "eval_loss": 0.45533671975135803, + "eval_precision": 0.9090721377215957, + "eval_recall": 0.9098083427282976, + "eval_runtime": 33.0675, + "eval_samples_per_second": 53.648, + "eval_steps_per_second": 0.847, + "step": 6062 + }, + { + "epoch": 97.12, + "learning_rate": 2.1612903225806454e-05, + "loss": 0.0862, + "step": 6070 + }, + { + "epoch": 97.28, + "learning_rate": 2e-05, + "loss": 0.1078, + "step": 6080 + }, + { + "epoch": 97.44, + "learning_rate": 1.838709677419355e-05, + "loss": 0.1078, + "step": 6090 + }, + { + "epoch": 97.6, + "learning_rate": 1.6774193548387098e-05, + "loss": 0.107, + "step": 6100 + }, + { + "epoch": 97.76, + "learning_rate": 1.5161290322580646e-05, + "loss": 0.1139, + "step": 6110 + }, + { + "epoch": 97.92, + "learning_rate": 1.3548387096774194e-05, + "loss": 0.1081, + "step": 6120 + }, + { + "epoch": 98.0, + "step": 6125, + "train_accuracy": 0.9812621420066429, + "train_f1": 0.9812114737389049, + "train_loss": 0.054921120405197144, + "train_precision": 0.9812508070970374, + "train_recall": 0.9812621420066429, + "train_runtime": 252.6106, + "train_samples_per_second": 63.168, + "train_steps_per_second": 0.99 + }, + { + "epoch": 98.0, + "eval_accuracy": 0.9103720405862458, + "eval_f1": 0.9096321309054232, + "eval_loss": 0.4532192647457123, + "eval_precision": 0.9096536769299824, + "eval_recall": 0.9103720405862458, + "eval_runtime": 29.6384, + "eval_samples_per_second": 59.855, + "eval_steps_per_second": 0.945, + "step": 6125 + }, + { + "epoch": 98.08, + "learning_rate": 1.1935483870967743e-05, + "loss": 0.1011, + "step": 6130 + }, + { + "epoch": 98.24, + "learning_rate": 1.0322580645161291e-05, + "loss": 0.1031, + "step": 6140 + }, + { + "epoch": 98.4, + "learning_rate": 8.709677419354838e-06, + "loss": 0.0821, + "step": 6150 + }, + { + "epoch": 98.56, + "learning_rate": 7.096774193548388e-06, + "loss": 0.0901, + "step": 6160 + }, + { + "epoch": 98.72, + "learning_rate": 5.483870967741935e-06, + "loss": 0.0809, + "step": 6170 + }, + { + "epoch": 98.88, + "learning_rate": 3.870967741935484e-06, + "loss": 0.0932, + "step": 6180 + }, + { + "epoch": 98.99, + "step": 6187, + "train_accuracy": 0.9805101209500533, + "train_f1": 0.9804742522228167, + "train_loss": 0.055186156183481216, + "train_precision": 0.980498073403408, + "train_recall": 0.9805101209500533, + "train_runtime": 254.3736, + "train_samples_per_second": 62.731, + "train_steps_per_second": 0.983 + }, + { + "epoch": 98.99, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.9090900239365977, + "eval_loss": 0.4536750614643097, + "eval_precision": 0.9091114953941556, + "eval_recall": 0.9098083427282976, + "eval_runtime": 29.5537, + "eval_samples_per_second": 60.026, + "eval_steps_per_second": 0.947, + "step": 6187 + }, + { + "epoch": 99.04, + "learning_rate": 2.2580645161290324e-06, + "loss": 0.1136, + "step": 6190 + }, + { + "epoch": 99.2, + "learning_rate": 6.451612903225807e-07, + "loss": 0.0934, + "step": 6200 + }, + { + "epoch": 99.2, + "step": 6200, + "train_accuracy": 0.9809487998997305, + "train_f1": 0.9809066394306372, + "train_loss": 0.05620851740241051, + "train_precision": 0.9809434303564252, + "train_recall": 0.9809487998997305, + "train_runtime": 257.0414, + "train_samples_per_second": 62.08, + "train_steps_per_second": 0.973 + }, + { + "epoch": 99.2, + "eval_accuracy": 0.9098083427282976, + "eval_f1": 0.9090900239365977, + "eval_loss": 0.4539656639099121, + "eval_precision": 0.9091114953941556, + "eval_recall": 0.9098083427282976, + "eval_runtime": 29.6217, + "eval_samples_per_second": 59.889, + "eval_steps_per_second": 0.945, + "step": 6200 + }, + { + "epoch": 99.2, + "step": 6200, + "total_flos": 9.4522774317222e+19, + "train_loss": 0.2388173005080992, + "train_runtime": 58791.9316, + "train_samples_per_second": 27.141, + "train_steps_per_second": 0.105 + } + ], + "logging_steps": 10, + "max_steps": 6200, + "num_train_epochs": 100, + "save_steps": 500, + "total_flos": 9.4522774317222e+19, + "trial_name": null, + "trial_params": null +}