{ "best_metric": 0.11870068311691284, "best_model_checkpoint": "/home1/datawork/mcontini/models/multilabel/huggingface/DinoVdeau-large-2024_04_03-with_data_aug_batch-size32_epochs150_freeze/checkpoint-31707", "epoch": 127.0, "eval_steps": 500, "global_step": 34417, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.2206588881262869, "eval_f1_macro": 0.49067608571654225, "eval_f1_micro": 0.7368702869126769, "eval_loss": 0.1679287850856781, "eval_roc_auc": 0.8187531778602468, "eval_runtime": 714.8817, "eval_samples_per_second": 4.076, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 271 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 0.2713, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.2515442690459849, "eval_f1_macro": 0.538945165530625, "eval_f1_micro": 0.761357152262652, "eval_loss": 0.15397264063358307, "eval_roc_auc": 0.8356163050775759, "eval_runtime": 720.5749, "eval_samples_per_second": 4.044, "eval_steps_per_second": 0.128, "learning_rate": 0.001, "step": 542 }, { "epoch": 3.0, "eval_accuracy": 0.2525737817433082, "eval_f1_macro": 0.6053847883354035, "eval_f1_micro": 0.7728152001031681, "eval_loss": 0.14765480160713196, "eval_roc_auc": 0.8471568306864959, "eval_runtime": 730.8961, "eval_samples_per_second": 3.987, "eval_steps_per_second": 0.126, "learning_rate": 0.001, "step": 813 }, { "epoch": 3.69, "learning_rate": 0.001, "loss": 0.1679, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.2594371997254633, "eval_f1_macro": 0.5847686766758632, "eval_f1_micro": 0.7755244755244756, "eval_loss": 0.1577611267566681, "eval_roc_auc": 0.8442033715582856, "eval_runtime": 714.9053, "eval_samples_per_second": 4.076, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 1084 }, { "epoch": 5.0, "eval_accuracy": 0.2618393960192176, "eval_f1_macro": 0.6124919180480791, "eval_f1_micro": 0.7818521347933113, "eval_loss": 0.1426197737455368, "eval_roc_auc": 0.8555100487696016, "eval_runtime": 733.6285, "eval_samples_per_second": 3.972, "eval_steps_per_second": 0.125, "learning_rate": 0.001, "step": 1355 }, { "epoch": 5.54, "learning_rate": 0.001, "loss": 0.1598, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.25497597803706246, "eval_f1_macro": 0.6238861296316591, "eval_f1_micro": 0.7822487732024749, "eval_loss": 0.1422213762998581, "eval_roc_auc": 0.854243572334213, "eval_runtime": 724.219, "eval_samples_per_second": 4.024, "eval_steps_per_second": 0.127, "learning_rate": 0.001, "step": 1626 }, { "epoch": 7.0, "eval_accuracy": 0.255662319835278, "eval_f1_macro": 0.6319974141584176, "eval_f1_micro": 0.7825118828416049, "eval_loss": 0.1426122486591339, "eval_roc_auc": 0.8534283984871422, "eval_runtime": 746.9062, "eval_samples_per_second": 3.901, "eval_steps_per_second": 0.123, "learning_rate": 0.001, "step": 1897 }, { "epoch": 7.38, "learning_rate": 0.001, "loss": 0.1571, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.26286890871654084, "eval_f1_macro": 0.622325808061766, "eval_f1_micro": 0.7755977927651747, "eval_loss": 0.1528300642967224, "eval_roc_auc": 0.8437122124534273, "eval_runtime": 750.7491, "eval_samples_per_second": 3.881, "eval_steps_per_second": 0.123, "learning_rate": 0.001, "step": 2168 }, { "epoch": 9.0, "eval_accuracy": 0.24811256005490734, "eval_f1_macro": 0.6413065292545327, "eval_f1_micro": 0.7796035913942063, "eval_loss": 0.1438213288784027, "eval_roc_auc": 0.8548646774451706, "eval_runtime": 702.3842, "eval_samples_per_second": 4.149, "eval_steps_per_second": 0.131, "learning_rate": 0.001, "step": 2439 }, { "epoch": 9.23, "learning_rate": 0.001, "loss": 0.1554, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.26973232669869596, "eval_f1_macro": 0.6289481397755302, "eval_f1_micro": 0.7888898226741743, "eval_loss": 0.14050152897834778, "eval_roc_auc": 0.8621391370400399, "eval_runtime": 707.8611, "eval_samples_per_second": 4.117, "eval_steps_per_second": 0.13, "learning_rate": 0.001, "step": 2710 }, { "epoch": 11.0, "eval_accuracy": 0.26835964310226496, "eval_f1_macro": 0.6222474437475864, "eval_f1_micro": 0.789769130122821, "eval_loss": 0.14092855155467987, "eval_roc_auc": 0.8613792306841266, "eval_runtime": 711.2194, "eval_samples_per_second": 4.097, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 2981 }, { "epoch": 11.07, "learning_rate": 0.001, "loss": 0.1536, "step": 3000 }, { "epoch": 12.0, "eval_accuracy": 0.272477693891558, "eval_f1_macro": 0.6165764997376745, "eval_f1_micro": 0.7862856154611094, "eval_loss": 0.1391688883304596, "eval_roc_auc": 0.8528316876814077, "eval_runtime": 716.1193, "eval_samples_per_second": 4.069, "eval_steps_per_second": 0.128, "learning_rate": 0.001, "step": 3252 }, { "epoch": 12.92, "learning_rate": 0.001, "loss": 0.1526, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.2625257378174331, "eval_f1_macro": 0.6418972133310931, "eval_f1_micro": 0.7877202761222827, "eval_loss": 0.13992685079574585, "eval_roc_auc": 0.8558891516745222, "eval_runtime": 710.6314, "eval_samples_per_second": 4.101, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 3523 }, { "epoch": 14.0, "eval_accuracy": 0.26492793411118737, "eval_f1_macro": 0.632569342193695, "eval_f1_micro": 0.7859690345319514, "eval_loss": 0.1437946856021881, "eval_roc_auc": 0.8609422021280538, "eval_runtime": 694.5265, "eval_samples_per_second": 4.196, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 3794 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 0.1535, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.2735072065888813, "eval_f1_macro": 0.6499317144862637, "eval_f1_micro": 0.7930450968779085, "eval_loss": 0.13769365847110748, "eval_roc_auc": 0.8625195818341326, "eval_runtime": 690.6675, "eval_samples_per_second": 4.219, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 4065 }, { "epoch": 16.0, "eval_accuracy": 0.2676733013040494, "eval_f1_macro": 0.6435362275021823, "eval_f1_micro": 0.7868312757201646, "eval_loss": 0.13966824114322662, "eval_roc_auc": 0.8526053782518377, "eval_runtime": 692.8928, "eval_samples_per_second": 4.206, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 4336 }, { "epoch": 16.61, "learning_rate": 0.001, "loss": 0.1517, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.26458476321207963, "eval_f1_macro": 0.6400976985447264, "eval_f1_micro": 0.7928011464216472, "eval_loss": 0.1382310837507248, "eval_roc_auc": 0.8634350422987906, "eval_runtime": 684.8353, "eval_samples_per_second": 4.255, "eval_steps_per_second": 0.134, "learning_rate": 0.001, "step": 4607 }, { "epoch": 18.0, "eval_accuracy": 0.26835964310226496, "eval_f1_macro": 0.6285887470600311, "eval_f1_micro": 0.7912449392712552, "eval_loss": 0.1392030268907547, "eval_roc_auc": 0.8624142094617062, "eval_runtime": 684.0603, "eval_samples_per_second": 4.26, "eval_steps_per_second": 0.134, "learning_rate": 0.001, "step": 4878 }, { "epoch": 18.45, "learning_rate": 0.001, "loss": 0.1524, "step": 5000 }, { "epoch": 19.0, "eval_accuracy": 0.26355525051475637, "eval_f1_macro": 0.6182634857374021, "eval_f1_micro": 0.7874116344434035, "eval_loss": 0.1391826868057251, "eval_roc_auc": 0.8575979217492725, "eval_runtime": 683.6418, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.135, "learning_rate": 0.001, "step": 5149 }, { "epoch": 20.0, "eval_accuracy": 0.25978037062457104, "eval_f1_macro": 0.6286353323757679, "eval_f1_micro": 0.7878349022447502, "eval_loss": 0.13860712945461273, "eval_roc_auc": 0.857768912450985, "eval_runtime": 691.5194, "eval_samples_per_second": 4.214, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 5420 }, { "epoch": 20.3, "learning_rate": 0.001, "loss": 0.1527, "step": 5500 }, { "epoch": 21.0, "eval_accuracy": 0.2601235415236788, "eval_f1_macro": 0.6408380159549439, "eval_f1_micro": 0.7879620486841541, "eval_loss": 0.13738416135311127, "eval_roc_auc": 0.8556654235498968, "eval_runtime": 685.1425, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.134, "learning_rate": 0.001, "step": 5691 }, { "epoch": 22.0, "eval_accuracy": 0.2704186684969115, "eval_f1_macro": 0.6476322873247978, "eval_f1_micro": 0.7897032412554519, "eval_loss": 0.13765838742256165, "eval_roc_auc": 0.8577058498082936, "eval_runtime": 697.7818, "eval_samples_per_second": 4.176, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 5962 }, { "epoch": 22.14, "learning_rate": 0.001, "loss": 0.1513, "step": 6000 }, { "epoch": 23.0, "eval_accuracy": 0.26973232669869596, "eval_f1_macro": 0.6442899469566483, "eval_f1_micro": 0.7955080753701211, "eval_loss": 0.13733763992786407, "eval_roc_auc": 0.865520436446917, "eval_runtime": 694.652, "eval_samples_per_second": 4.195, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 6233 }, { "epoch": 23.99, "learning_rate": 0.001, "loss": 0.1514, "step": 6500 }, { "epoch": 24.0, "eval_accuracy": 0.2656142759094029, "eval_f1_macro": 0.6476856744170203, "eval_f1_micro": 0.7877039652128988, "eval_loss": 0.15933051705360413, "eval_roc_auc": 0.854747407398644, "eval_runtime": 693.7997, "eval_samples_per_second": 4.2, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 6504 }, { "epoch": 25.0, "eval_accuracy": 0.2656142759094029, "eval_f1_macro": 0.6476685424002145, "eval_f1_micro": 0.7909313518534156, "eval_loss": 0.1371144950389862, "eval_roc_auc": 0.8619333499582761, "eval_runtime": 697.1431, "eval_samples_per_second": 4.18, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 6775 }, { "epoch": 25.83, "learning_rate": 0.001, "loss": 0.1513, "step": 7000 }, { "epoch": 26.0, "eval_accuracy": 0.26664378860672616, "eval_f1_macro": 0.6272636530018297, "eval_f1_micro": 0.7871246489522575, "eval_loss": 0.13742324709892273, "eval_roc_auc": 0.8534693919460551, "eval_runtime": 694.4047, "eval_samples_per_second": 4.196, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 7046 }, { "epoch": 27.0, "eval_accuracy": 0.26458476321207963, "eval_f1_macro": 0.6470102889103709, "eval_f1_micro": 0.7933764066578238, "eval_loss": 0.13733525574207306, "eval_roc_auc": 0.8595147784088812, "eval_runtime": 705.8388, "eval_samples_per_second": 4.128, "eval_steps_per_second": 0.13, "learning_rate": 0.001, "step": 7317 }, { "epoch": 27.68, "learning_rate": 0.001, "loss": 0.1508, "step": 7500 }, { "epoch": 28.0, "eval_accuracy": 0.2735072065888813, "eval_f1_macro": 0.652320369204269, "eval_f1_micro": 0.7932826525791349, "eval_loss": 0.13527436554431915, "eval_roc_auc": 0.8584141813020159, "eval_runtime": 689.3753, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 7588 }, { "epoch": 29.0, "eval_accuracy": 0.27762525737817434, "eval_f1_macro": 0.652221916857101, "eval_f1_micro": 0.7959942533592496, "eval_loss": 0.1361834853887558, "eval_roc_auc": 0.864468679187198, "eval_runtime": 711.9932, "eval_samples_per_second": 4.093, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 7859 }, { "epoch": 29.52, "learning_rate": 0.001, "loss": 0.1506, "step": 8000 }, { "epoch": 30.0, "eval_accuracy": 0.25051475634866166, "eval_f1_macro": 0.6283472874420969, "eval_f1_micro": 0.784903397164032, "eval_loss": 0.13839198648929596, "eval_roc_auc": 0.8546034947804995, "eval_runtime": 711.3244, "eval_samples_per_second": 4.097, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 8130 }, { "epoch": 31.0, "eval_accuracy": 0.2717913520933425, "eval_f1_macro": 0.6629936811365008, "eval_f1_micro": 0.7963528413910094, "eval_loss": 0.13423041999340057, "eval_roc_auc": 0.8636329317556207, "eval_runtime": 717.6936, "eval_samples_per_second": 4.06, "eval_steps_per_second": 0.128, "learning_rate": 0.001, "step": 8401 }, { "epoch": 31.37, "learning_rate": 0.001, "loss": 0.151, "step": 8500 }, { "epoch": 32.0, "eval_accuracy": 0.2717913520933425, "eval_f1_macro": 0.655637689548565, "eval_f1_micro": 0.7967574308875494, "eval_loss": 0.13658006489276886, "eval_roc_auc": 0.8695589279972756, "eval_runtime": 709.164, "eval_samples_per_second": 4.109, "eval_steps_per_second": 0.13, "learning_rate": 0.001, "step": 8672 }, { "epoch": 33.0, "eval_accuracy": 0.2824296499656829, "eval_f1_macro": 0.6635088812977026, "eval_f1_micro": 0.7984512261126608, "eval_loss": 0.13591675460338593, "eval_roc_auc": 0.8700680281449957, "eval_runtime": 711.6984, "eval_samples_per_second": 4.094, "eval_steps_per_second": 0.129, "learning_rate": 0.001, "step": 8943 }, { "epoch": 33.21, "learning_rate": 0.001, "loss": 0.1507, "step": 9000 }, { "epoch": 34.0, "eval_accuracy": 0.28140013726835966, "eval_f1_macro": 0.640009703489063, "eval_f1_micro": 0.7998982533491605, "eval_loss": 0.13349105417728424, "eval_roc_auc": 0.8656642978283616, "eval_runtime": 702.7082, "eval_samples_per_second": 4.147, "eval_steps_per_second": 0.131, "learning_rate": 0.001, "step": 9214 }, { "epoch": 35.0, "eval_accuracy": 0.272477693891558, "eval_f1_macro": 0.6519975762807232, "eval_f1_micro": 0.7962736584748978, "eval_loss": 0.13431623578071594, "eval_roc_auc": 0.8653056916528975, "eval_runtime": 695.9796, "eval_samples_per_second": 4.187, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 9485 }, { "epoch": 35.06, "learning_rate": 0.001, "loss": 0.1495, "step": 9500 }, { "epoch": 36.0, "eval_accuracy": 0.26355525051475637, "eval_f1_macro": 0.6451443559854592, "eval_f1_micro": 0.7924289154590393, "eval_loss": 0.14291881024837494, "eval_roc_auc": 0.8625517456725394, "eval_runtime": 695.1357, "eval_samples_per_second": 4.192, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 9756 }, { "epoch": 36.9, "learning_rate": 0.001, "loss": 0.1496, "step": 10000 }, { "epoch": 37.0, "eval_accuracy": 0.2731640356897735, "eval_f1_macro": 0.653137873575441, "eval_f1_micro": 0.7981288539230278, "eval_loss": 0.13305164873600006, "eval_roc_auc": 0.8638193078136003, "eval_runtime": 697.8225, "eval_samples_per_second": 4.176, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 10027 }, { "epoch": 38.0, "eval_accuracy": 0.26835964310226496, "eval_f1_macro": 0.6306015382070221, "eval_f1_micro": 0.7938126806051335, "eval_loss": 0.13497667014598846, "eval_roc_auc": 0.8616735348011272, "eval_runtime": 691.2148, "eval_samples_per_second": 4.216, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 10298 }, { "epoch": 38.75, "learning_rate": 0.001, "loss": 0.1503, "step": 10500 }, { "epoch": 39.0, "eval_accuracy": 0.2800274536719286, "eval_f1_macro": 0.6464711728800093, "eval_f1_micro": 0.7983622472668946, "eval_loss": 0.1351996511220932, "eval_roc_auc": 0.866066786187883, "eval_runtime": 704.3219, "eval_samples_per_second": 4.137, "eval_steps_per_second": 0.131, "learning_rate": 0.001, "step": 10569 }, { "epoch": 40.0, "eval_accuracy": 0.27282086479066575, "eval_f1_macro": 0.6271011637782951, "eval_f1_micro": 0.792462504807076, "eval_loss": 0.13469766080379486, "eval_roc_auc": 0.8594016621216213, "eval_runtime": 698.796, "eval_samples_per_second": 4.17, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 10840 }, { "epoch": 40.59, "learning_rate": 0.001, "loss": 0.1505, "step": 11000 }, { "epoch": 41.0, "eval_accuracy": 0.2721345229924502, "eval_f1_macro": 0.6600650686410704, "eval_f1_micro": 0.7934581450398106, "eval_loss": 0.13396936655044556, "eval_roc_auc": 0.8579358774851804, "eval_runtime": 697.9239, "eval_samples_per_second": 4.175, "eval_steps_per_second": 0.132, "learning_rate": 0.001, "step": 11111 }, { "epoch": 42.0, "eval_accuracy": 0.27110501029512696, "eval_f1_macro": 0.6636104085895331, "eval_f1_micro": 0.7982560108364375, "eval_loss": 0.13215309381484985, "eval_roc_auc": 0.8652014818411935, "eval_runtime": 682.8814, "eval_samples_per_second": 4.267, "eval_steps_per_second": 0.135, "learning_rate": 0.001, "step": 11382 }, { "epoch": 42.44, "learning_rate": 0.001, "loss": 0.1491, "step": 11500 }, { "epoch": 43.0, "eval_accuracy": 0.2735072065888813, "eval_f1_macro": 0.6493239813433992, "eval_f1_micro": 0.7948717948717948, "eval_loss": 0.13603103160858154, "eval_roc_auc": 0.8634709756212143, "eval_runtime": 691.0843, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.133, "learning_rate": 0.001, "step": 11653 }, { "epoch": 44.0, "eval_accuracy": 0.28140013726835966, "eval_f1_macro": 0.6400431467798345, "eval_f1_micro": 0.7955270207066627, "eval_loss": 0.13608315587043762, "eval_roc_auc": 0.8624513565815523, "eval_runtime": 684.569, "eval_samples_per_second": 4.257, "eval_steps_per_second": 0.134, "learning_rate": 0.001, "step": 11924 }, { "epoch": 44.28, "learning_rate": 0.001, "loss": 0.1507, "step": 12000 }, { "epoch": 45.0, "eval_accuracy": 0.28140013726835966, "eval_f1_macro": 0.6424034446314527, "eval_f1_micro": 0.7970817780794026, "eval_loss": 0.13283775746822357, "eval_roc_auc": 0.863982631455529, "eval_runtime": 683.5579, "eval_samples_per_second": 4.263, "eval_steps_per_second": 0.135, "learning_rate": 0.001, "step": 12195 }, { "epoch": 46.0, "eval_accuracy": 0.2786547700754976, "eval_f1_macro": 0.6468829157126516, "eval_f1_micro": 0.7938707872422847, "eval_loss": 0.13275618851184845, "eval_roc_auc": 0.8581338694154104, "eval_runtime": 680.6581, "eval_samples_per_second": 4.281, "eval_steps_per_second": 0.135, "learning_rate": 0.001, "step": 12466 }, { "epoch": 46.13, "learning_rate": 0.001, "loss": 0.1495, "step": 12500 }, { "epoch": 47.0, "eval_accuracy": 0.27522306108442, "eval_f1_macro": 0.6351226382655659, "eval_f1_micro": 0.7977320453590928, "eval_loss": 0.133217915892601, "eval_roc_auc": 0.8671532629726034, "eval_runtime": 679.0852, "eval_samples_per_second": 4.291, "eval_steps_per_second": 0.135, "learning_rate": 0.001, "step": 12737 }, { "epoch": 47.97, "learning_rate": 0.001, "loss": 0.1498, "step": 13000 }, { "epoch": 48.0, "eval_accuracy": 0.2817433081674674, "eval_f1_macro": 0.6490013214958164, "eval_f1_micro": 0.8012753282711751, "eval_loss": 0.1325378566980362, "eval_roc_auc": 0.8694327770935429, "eval_runtime": 680.9219, "eval_samples_per_second": 4.279, "eval_steps_per_second": 0.135, "learning_rate": 0.001, "step": 13008 }, { "epoch": 49.0, "eval_accuracy": 0.2882635552505148, "eval_f1_macro": 0.6738484837965685, "eval_f1_micro": 0.8061649892618015, "eval_loss": 0.12826864421367645, "eval_roc_auc": 0.8710295165867058, "eval_runtime": 681.5467, "eval_samples_per_second": 4.276, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 13279 }, { "epoch": 49.82, "learning_rate": 0.0001, "loss": 0.1416, "step": 13500 }, { "epoch": 50.0, "eval_accuracy": 0.2872340425531915, "eval_f1_macro": 0.6734470768849717, "eval_f1_micro": 0.8086902026321288, "eval_loss": 0.12865300476551056, "eval_roc_auc": 0.8747272405310621, "eval_runtime": 680.1838, "eval_samples_per_second": 4.284, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 13550 }, { "epoch": 51.0, "eval_accuracy": 0.2899794097460535, "eval_f1_macro": 0.6713719689925478, "eval_f1_micro": 0.8067049484884311, "eval_loss": 0.12803621590137482, "eval_roc_auc": 0.870555059108937, "eval_runtime": 682.2484, "eval_samples_per_second": 4.271, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 13821 }, { "epoch": 51.66, "learning_rate": 0.0001, "loss": 0.1387, "step": 14000 }, { "epoch": 52.0, "eval_accuracy": 0.2899794097460535, "eval_f1_macro": 0.6744045610607882, "eval_f1_micro": 0.8067475584492453, "eval_loss": 0.12618477642536163, "eval_roc_auc": 0.8701989061466676, "eval_runtime": 687.4248, "eval_samples_per_second": 4.239, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 14092 }, { "epoch": 53.0, "eval_accuracy": 0.2910089224433768, "eval_f1_macro": 0.6763777606257594, "eval_f1_micro": 0.8094476254631189, "eval_loss": 0.1262361854314804, "eval_roc_auc": 0.8728950289492735, "eval_runtime": 697.5202, "eval_samples_per_second": 4.178, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 14363 }, { "epoch": 53.51, "learning_rate": 0.0001, "loss": 0.1356, "step": 14500 }, { "epoch": 54.0, "eval_accuracy": 0.2947838023335621, "eval_f1_macro": 0.6743662487872923, "eval_f1_micro": 0.809105193867584, "eval_loss": 0.12573254108428955, "eval_roc_auc": 0.8701859740035948, "eval_runtime": 693.4876, "eval_samples_per_second": 4.202, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 14634 }, { "epoch": 55.0, "eval_accuracy": 0.2947838023335621, "eval_f1_macro": 0.681373956781595, "eval_f1_micro": 0.8106019238039233, "eval_loss": 0.12566907703876495, "eval_roc_auc": 0.8742077383085138, "eval_runtime": 700.1378, "eval_samples_per_second": 4.162, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 14905 }, { "epoch": 55.35, "learning_rate": 0.0001, "loss": 0.1348, "step": 15000 }, { "epoch": 56.0, "eval_accuracy": 0.3009608785175017, "eval_f1_macro": 0.6772158941721765, "eval_f1_micro": 0.8107721439091101, "eval_loss": 0.12600058317184448, "eval_roc_auc": 0.873783436948714, "eval_runtime": 679.772, "eval_samples_per_second": 4.287, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 15176 }, { "epoch": 57.0, "eval_accuracy": 0.29855868222374743, "eval_f1_macro": 0.6806264224832896, "eval_f1_micro": 0.8128972900635664, "eval_loss": 0.12499917298555374, "eval_roc_auc": 0.8767727269259479, "eval_runtime": 685.6404, "eval_samples_per_second": 4.25, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 15447 }, { "epoch": 57.2, "learning_rate": 0.0001, "loss": 0.135, "step": 15500 }, { "epoch": 58.0, "eval_accuracy": 0.3081674673987646, "eval_f1_macro": 0.6858667117679004, "eval_f1_micro": 0.8141957160856784, "eval_loss": 0.12423347681760788, "eval_roc_auc": 0.8762249584407599, "eval_runtime": 700.338, "eval_samples_per_second": 4.161, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 15718 }, { "epoch": 59.0, "eval_accuracy": 0.3026767330130405, "eval_f1_macro": 0.6869701850967913, "eval_f1_micro": 0.8124398308986648, "eval_loss": 0.12452811747789383, "eval_roc_auc": 0.8762597292024611, "eval_runtime": 691.5065, "eval_samples_per_second": 4.214, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 15989 }, { "epoch": 59.04, "learning_rate": 0.0001, "loss": 0.1334, "step": 16000 }, { "epoch": 60.0, "eval_accuracy": 0.3030199039121482, "eval_f1_macro": 0.6853545892350839, "eval_f1_micro": 0.8137861803580391, "eval_loss": 0.1242317408323288, "eval_roc_auc": 0.8772149949469599, "eval_runtime": 700.2391, "eval_samples_per_second": 4.161, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 16260 }, { "epoch": 60.89, "learning_rate": 0.0001, "loss": 0.1335, "step": 16500 }, { "epoch": 61.0, "eval_accuracy": 0.3064516129032258, "eval_f1_macro": 0.6889451502143565, "eval_f1_micro": 0.8139671855279764, "eval_loss": 0.12397264689207077, "eval_roc_auc": 0.8756026651507299, "eval_runtime": 691.2006, "eval_samples_per_second": 4.216, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 16531 }, { "epoch": 62.0, "eval_accuracy": 0.3016472203157172, "eval_f1_macro": 0.6808873228168837, "eval_f1_micro": 0.8152336604024614, "eval_loss": 0.12486530840396881, "eval_roc_auc": 0.8798070104112615, "eval_runtime": 696.5149, "eval_samples_per_second": 4.184, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 16802 }, { "epoch": 62.73, "learning_rate": 0.0001, "loss": 0.1308, "step": 17000 }, { "epoch": 63.0, "eval_accuracy": 0.30679478380233355, "eval_f1_macro": 0.6848490082628171, "eval_f1_micro": 0.814602720114531, "eval_loss": 0.1233312338590622, "eval_roc_auc": 0.8756913615144166, "eval_runtime": 687.8446, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 17073 }, { "epoch": 64.0, "eval_accuracy": 0.3057652711050103, "eval_f1_macro": 0.6908433037124228, "eval_f1_micro": 0.8151443922095367, "eval_loss": 0.12344498932361603, "eval_roc_auc": 0.8769218021539817, "eval_runtime": 702.1676, "eval_samples_per_second": 4.15, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 17344 }, { "epoch": 64.58, "learning_rate": 0.0001, "loss": 0.1326, "step": 17500 }, { "epoch": 65.0, "eval_accuracy": 0.303363074811256, "eval_f1_macro": 0.6812168886607934, "eval_f1_micro": 0.8124392614188533, "eval_loss": 0.12330327183008194, "eval_roc_auc": 0.8734876628507912, "eval_runtime": 680.7902, "eval_samples_per_second": 4.28, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 17615 }, { "epoch": 66.0, "eval_accuracy": 0.3026767330130405, "eval_f1_macro": 0.6878311502092693, "eval_f1_micro": 0.8144785071642787, "eval_loss": 0.12320297956466675, "eval_roc_auc": 0.8788401544128625, "eval_runtime": 690.4713, "eval_samples_per_second": 4.22, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 17886 }, { "epoch": 66.42, "learning_rate": 0.0001, "loss": 0.1306, "step": 18000 }, { "epoch": 67.0, "eval_accuracy": 0.3074811256005491, "eval_f1_macro": 0.6857441975499322, "eval_f1_micro": 0.8115152031343156, "eval_loss": 0.1227714866399765, "eval_roc_auc": 0.8706963391854371, "eval_runtime": 709.7385, "eval_samples_per_second": 4.106, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 18157 }, { "epoch": 68.0, "eval_accuracy": 0.3074811256005491, "eval_f1_macro": 0.6913054019394733, "eval_f1_micro": 0.8153315962007229, "eval_loss": 0.12259615212678909, "eval_roc_auc": 0.8766517898688044, "eval_runtime": 686.725, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 18428 }, { "epoch": 68.27, "learning_rate": 0.0001, "loss": 0.1299, "step": 18500 }, { "epoch": 69.0, "eval_accuracy": 0.30851063829787234, "eval_f1_macro": 0.676353464691654, "eval_f1_micro": 0.8143055965585593, "eval_loss": 0.12271784245967865, "eval_roc_auc": 0.8750562129363642, "eval_runtime": 693.9986, "eval_samples_per_second": 4.199, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 18699 }, { "epoch": 70.0, "eval_accuracy": 0.31056966369251887, "eval_f1_macro": 0.6999234521712909, "eval_f1_micro": 0.81868109179502, "eval_loss": 0.12295936793088913, "eval_roc_auc": 0.8837914226495347, "eval_runtime": 690.298, "eval_samples_per_second": 4.221, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 18970 }, { "epoch": 70.11, "learning_rate": 0.0001, "loss": 0.1295, "step": 19000 }, { "epoch": 71.0, "eval_accuracy": 0.30679478380233355, "eval_f1_macro": 0.6893382907323766, "eval_f1_micro": 0.8152834008097165, "eval_loss": 0.12247700244188309, "eval_roc_auc": 0.8756051952189744, "eval_runtime": 708.7396, "eval_samples_per_second": 4.112, "eval_steps_per_second": 0.13, "learning_rate": 0.0001, "step": 19241 }, { "epoch": 71.96, "learning_rate": 0.0001, "loss": 0.1289, "step": 19500 }, { "epoch": 72.0, "eval_accuracy": 0.30370624571036375, "eval_f1_macro": 0.6867583154614194, "eval_f1_micro": 0.815056734916049, "eval_loss": 0.12231950461864471, "eval_roc_auc": 0.877607106850003, "eval_runtime": 677.1006, "eval_samples_per_second": 4.304, "eval_steps_per_second": 0.136, "learning_rate": 0.0001, "step": 19512 }, { "epoch": 73.0, "eval_accuracy": 0.30542210020590255, "eval_f1_macro": 0.6917904729736315, "eval_f1_micro": 0.8165095327886026, "eval_loss": 0.12229206413030624, "eval_roc_auc": 0.8781856460477259, "eval_runtime": 692.4475, "eval_samples_per_second": 4.208, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 19783 }, { "epoch": 73.8, "learning_rate": 0.0001, "loss": 0.1279, "step": 20000 }, { "epoch": 74.0, "eval_accuracy": 0.30542210020590255, "eval_f1_macro": 0.6855916835036744, "eval_f1_micro": 0.8142863173892853, "eval_loss": 0.12248736619949341, "eval_roc_auc": 0.8747476347871186, "eval_runtime": 687.0839, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 20054 }, { "epoch": 75.0, "eval_accuracy": 0.31022649279341113, "eval_f1_macro": 0.6878236573831232, "eval_f1_micro": 0.8167385749591589, "eval_loss": 0.12206920981407166, "eval_roc_auc": 0.8784119298589457, "eval_runtime": 681.8157, "eval_samples_per_second": 4.274, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 20325 }, { "epoch": 75.65, "learning_rate": 0.0001, "loss": 0.1276, "step": 20500 }, { "epoch": 76.0, "eval_accuracy": 0.31674673987645846, "eval_f1_macro": 0.6963741846422794, "eval_f1_micro": 0.8190190440471725, "eval_loss": 0.12172180414199829, "eval_roc_auc": 0.8812250665245784, "eval_runtime": 672.3327, "eval_samples_per_second": 4.334, "eval_steps_per_second": 0.137, "learning_rate": 0.0001, "step": 20596 }, { "epoch": 77.0, "eval_accuracy": 0.31022649279341113, "eval_f1_macro": 0.6940319651093022, "eval_f1_micro": 0.8179309191268713, "eval_loss": 0.12170004099607468, "eval_roc_auc": 0.8795832054315861, "eval_runtime": 683.2195, "eval_samples_per_second": 4.265, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 20867 }, { "epoch": 77.49, "learning_rate": 0.0001, "loss": 0.1274, "step": 21000 }, { "epoch": 78.0, "eval_accuracy": 0.3081674673987646, "eval_f1_macro": 0.6858950802374723, "eval_f1_micro": 0.8143468859965235, "eval_loss": 0.12156965583562851, "eval_roc_auc": 0.8735390327661822, "eval_runtime": 682.8416, "eval_samples_per_second": 4.267, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 21138 }, { "epoch": 79.0, "eval_accuracy": 0.31468771448181193, "eval_f1_macro": 0.6944979922976369, "eval_f1_micro": 0.8164588948787063, "eval_loss": 0.1215372309088707, "eval_roc_auc": 0.8766491829455099, "eval_runtime": 683.544, "eval_samples_per_second": 4.263, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 21409 }, { "epoch": 79.34, "learning_rate": 0.0001, "loss": 0.1269, "step": 21500 }, { "epoch": 80.0, "eval_accuracy": 0.31468771448181193, "eval_f1_macro": 0.6999268076002686, "eval_f1_micro": 0.8192881937183724, "eval_loss": 0.12135831266641617, "eval_roc_auc": 0.8802967507718976, "eval_runtime": 676.6048, "eval_samples_per_second": 4.307, "eval_steps_per_second": 0.136, "learning_rate": 0.0001, "step": 21680 }, { "epoch": 81.0, "eval_accuracy": 0.3112560054907344, "eval_f1_macro": 0.697411996468389, "eval_f1_micro": 0.81943004106691, "eval_loss": 0.12142007052898407, "eval_roc_auc": 0.8828322337109193, "eval_runtime": 681.4379, "eval_samples_per_second": 4.276, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 21951 }, { "epoch": 81.18, "learning_rate": 0.0001, "loss": 0.1259, "step": 22000 }, { "epoch": 82.0, "eval_accuracy": 0.31022649279341113, "eval_f1_macro": 0.695644238424867, "eval_f1_micro": 0.817148370317547, "eval_loss": 0.12121300399303436, "eval_roc_auc": 0.8782139123103657, "eval_runtime": 687.8898, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 22222 }, { "epoch": 83.0, "eval_accuracy": 0.31228551818805766, "eval_f1_macro": 0.6969593372601354, "eval_f1_micro": 0.8189676877885018, "eval_loss": 0.12076255679130554, "eval_roc_auc": 0.8791048759964009, "eval_runtime": 683.6587, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 22493 }, { "epoch": 83.03, "learning_rate": 0.0001, "loss": 0.1258, "step": 22500 }, { "epoch": 84.0, "eval_accuracy": 0.31537405628002746, "eval_f1_macro": 0.699678101503351, "eval_f1_micro": 0.8203842940685045, "eval_loss": 0.12093241512775421, "eval_roc_auc": 0.8812674531093634, "eval_runtime": 674.7538, "eval_samples_per_second": 4.319, "eval_steps_per_second": 0.136, "learning_rate": 0.0001, "step": 22764 }, { "epoch": 84.87, "learning_rate": 0.0001, "loss": 0.1251, "step": 23000 }, { "epoch": 85.0, "eval_accuracy": 0.3064516129032258, "eval_f1_macro": 0.6934546509096284, "eval_f1_micro": 0.8163317114448911, "eval_loss": 0.12106911092996597, "eval_roc_auc": 0.875220724757365, "eval_runtime": 682.5674, "eval_samples_per_second": 4.269, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 23035 }, { "epoch": 86.0, "eval_accuracy": 0.31537405628002746, "eval_f1_macro": 0.6971612900401489, "eval_f1_micro": 0.8200962947456564, "eval_loss": 0.12031004577875137, "eval_roc_auc": 0.8804175853556411, "eval_runtime": 685.5285, "eval_samples_per_second": 4.251, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 23306 }, { "epoch": 86.72, "learning_rate": 0.0001, "loss": 0.1251, "step": 23500 }, { "epoch": 87.0, "eval_accuracy": 0.3150308853809197, "eval_f1_macro": 0.6946815591935039, "eval_f1_micro": 0.8182238085240395, "eval_loss": 0.12081247568130493, "eval_roc_auc": 0.8785070282482941, "eval_runtime": 683.1403, "eval_samples_per_second": 4.266, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 23577 }, { "epoch": 88.0, "eval_accuracy": 0.31537405628002746, "eval_f1_macro": 0.6936867859001314, "eval_f1_micro": 0.8180521768308028, "eval_loss": 0.12136104702949524, "eval_roc_auc": 0.8787500393049534, "eval_runtime": 674.7599, "eval_samples_per_second": 4.319, "eval_steps_per_second": 0.136, "learning_rate": 0.0001, "step": 23848 }, { "epoch": 88.56, "learning_rate": 0.0001, "loss": 0.1246, "step": 24000 }, { "epoch": 89.0, "eval_accuracy": 0.31056966369251887, "eval_f1_macro": 0.6953152626493269, "eval_f1_micro": 0.8201318053981447, "eval_loss": 0.12058280408382416, "eval_roc_auc": 0.8796589952705502, "eval_runtime": 682.7847, "eval_samples_per_second": 4.268, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 24119 }, { "epoch": 90.0, "eval_accuracy": 0.3164035689773507, "eval_f1_macro": 0.696027478931947, "eval_f1_micro": 0.8213629530649741, "eval_loss": 0.12102781236171722, "eval_roc_auc": 0.8819102943655497, "eval_runtime": 686.1462, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.134, "learning_rate": 0.0001, "step": 24390 }, { "epoch": 90.41, "learning_rate": 0.0001, "loss": 0.1239, "step": 24500 }, { "epoch": 91.0, "eval_accuracy": 0.31537405628002746, "eval_f1_macro": 0.7005956031917913, "eval_f1_micro": 0.8201800293070964, "eval_loss": 0.1198815330862999, "eval_roc_auc": 0.8804638695161929, "eval_runtime": 683.8668, "eval_samples_per_second": 4.261, "eval_steps_per_second": 0.135, "learning_rate": 0.0001, "step": 24661 }, { "epoch": 92.0, "eval_accuracy": 0.316060398078243, "eval_f1_macro": 0.703909588701163, "eval_f1_micro": 0.8221735718121359, "eval_loss": 0.12077653408050537, "eval_roc_auc": 0.8856479260631466, "eval_runtime": 672.8479, "eval_samples_per_second": 4.331, "eval_steps_per_second": 0.137, "learning_rate": 0.0001, "step": 24932 }, { "epoch": 92.25, "learning_rate": 0.0001, "loss": 0.1238, "step": 25000 }, { "epoch": 93.0, "eval_accuracy": 0.31331503088538093, "eval_f1_macro": 0.7003774810286753, "eval_f1_micro": 0.8199147228492601, "eval_loss": 0.12042003870010376, "eval_roc_auc": 0.8807814566143944, "eval_runtime": 693.4839, "eval_samples_per_second": 4.202, "eval_steps_per_second": 0.133, "learning_rate": 0.0001, "step": 25203 }, { "epoch": 94.0, "eval_accuracy": 0.3143445435827042, "eval_f1_macro": 0.7036330518306199, "eval_f1_micro": 0.8230411686586985, "eval_loss": 0.1200033500790596, "eval_roc_auc": 0.8847080200565651, "eval_runtime": 700.664, "eval_samples_per_second": 4.159, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 25474 }, { "epoch": 94.1, "learning_rate": 0.0001, "loss": 0.1237, "step": 25500 }, { "epoch": 95.0, "eval_accuracy": 0.318805765271105, "eval_f1_macro": 0.7068704040733741, "eval_f1_micro": 0.8209067379143359, "eval_loss": 0.12061866372823715, "eval_roc_auc": 0.8817328760198262, "eval_runtime": 701.3444, "eval_samples_per_second": 4.155, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 25745 }, { "epoch": 95.94, "learning_rate": 0.0001, "loss": 0.1234, "step": 26000 }, { "epoch": 96.0, "eval_accuracy": 0.31468771448181193, "eval_f1_macro": 0.7059911076490224, "eval_f1_micro": 0.8222408026755854, "eval_loss": 0.12005680054426193, "eval_roc_auc": 0.8820439842373163, "eval_runtime": 697.7134, "eval_samples_per_second": 4.177, "eval_steps_per_second": 0.132, "learning_rate": 0.0001, "step": 26016 }, { "epoch": 97.0, "eval_accuracy": 0.30919698009608787, "eval_f1_macro": 0.7073615141822082, "eval_f1_micro": 0.8207813798836243, "eval_loss": 0.12039094418287277, "eval_roc_auc": 0.882995480497798, "eval_runtime": 701.6443, "eval_samples_per_second": 4.153, "eval_steps_per_second": 0.131, "learning_rate": 0.0001, "step": 26287 }, { "epoch": 97.79, "learning_rate": 1e-05, "loss": 0.1215, "step": 26500 }, { "epoch": 98.0, "eval_accuracy": 0.318805765271105, "eval_f1_macro": 0.7125305658957566, "eval_f1_micro": 0.8240848103362597, "eval_loss": 0.12003627419471741, "eval_roc_auc": 0.8859474440267361, "eval_runtime": 703.1401, "eval_samples_per_second": 4.144, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 26558 }, { "epoch": 99.0, "eval_accuracy": 0.31708991077556625, "eval_f1_macro": 0.7126519915135826, "eval_f1_micro": 0.8246739805423309, "eval_loss": 0.11952651292085648, "eval_roc_auc": 0.8863652921241748, "eval_runtime": 704.3933, "eval_samples_per_second": 4.137, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 26829 }, { "epoch": 99.63, "learning_rate": 1e-05, "loss": 0.1208, "step": 27000 }, { "epoch": 100.0, "eval_accuracy": 0.3164035689773507, "eval_f1_macro": 0.7076695321633534, "eval_f1_micro": 0.8225050234427328, "eval_loss": 0.11920821666717529, "eval_roc_auc": 0.8817868272401618, "eval_runtime": 693.6368, "eval_samples_per_second": 4.201, "eval_steps_per_second": 0.133, "learning_rate": 1e-05, "step": 27100 }, { "epoch": 101.0, "eval_accuracy": 0.31708991077556625, "eval_f1_macro": 0.706035453076238, "eval_f1_micro": 0.8232053422370618, "eval_loss": 0.11927199363708496, "eval_roc_auc": 0.8830828349723125, "eval_runtime": 710.5245, "eval_samples_per_second": 4.101, "eval_steps_per_second": 0.129, "learning_rate": 1e-05, "step": 27371 }, { "epoch": 101.48, "learning_rate": 1e-05, "loss": 0.1195, "step": 27500 }, { "epoch": 102.0, "eval_accuracy": 0.31846259437199725, "eval_f1_macro": 0.710532612125401, "eval_f1_micro": 0.8237933039793969, "eval_loss": 0.11972019821405411, "eval_roc_auc": 0.8848494282248152, "eval_runtime": 702.1024, "eval_samples_per_second": 4.15, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 27642 }, { "epoch": 103.0, "eval_accuracy": 0.31400137268359646, "eval_f1_macro": 0.7075672175938867, "eval_f1_micro": 0.8215544737283866, "eval_loss": 0.11914487928152084, "eval_roc_auc": 0.880506256100978, "eval_runtime": 705.9918, "eval_samples_per_second": 4.128, "eval_steps_per_second": 0.13, "learning_rate": 1e-05, "step": 27913 }, { "epoch": 103.32, "learning_rate": 1e-05, "loss": 0.1197, "step": 28000 }, { "epoch": 104.0, "eval_accuracy": 0.32017844886753605, "eval_f1_macro": 0.7063140405914397, "eval_f1_micro": 0.8238771177621446, "eval_loss": 0.11925092339515686, "eval_roc_auc": 0.8842515548583585, "eval_runtime": 693.831, "eval_samples_per_second": 4.2, "eval_steps_per_second": 0.133, "learning_rate": 1e-05, "step": 28184 }, { "epoch": 105.0, "eval_accuracy": 0.3126286890871654, "eval_f1_macro": 0.7070687169886732, "eval_f1_micro": 0.8213039640169827, "eval_loss": 0.11896480619907379, "eval_roc_auc": 0.8798775351669367, "eval_runtime": 704.4845, "eval_samples_per_second": 4.136, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 28455 }, { "epoch": 105.17, "learning_rate": 1e-05, "loss": 0.1189, "step": 28500 }, { "epoch": 106.0, "eval_accuracy": 0.32017844886753605, "eval_f1_macro": 0.7060565909099816, "eval_f1_micro": 0.8232792762746488, "eval_loss": 0.11903885006904602, "eval_roc_auc": 0.8834646985106276, "eval_runtime": 702.4127, "eval_samples_per_second": 4.149, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 28726 }, { "epoch": 107.0, "eval_accuracy": 0.3164035689773507, "eval_f1_macro": 0.7038085792556672, "eval_f1_micro": 0.822364833689862, "eval_loss": 0.1193847730755806, "eval_roc_auc": 0.8811169591371075, "eval_runtime": 704.7837, "eval_samples_per_second": 4.135, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 28997 }, { "epoch": 107.01, "learning_rate": 1e-05, "loss": 0.1194, "step": 29000 }, { "epoch": 108.0, "eval_accuracy": 0.3191489361702128, "eval_f1_macro": 0.7110378528873161, "eval_f1_micro": 0.8232445520581114, "eval_loss": 0.11912781000137329, "eval_roc_auc": 0.883028832515277, "eval_runtime": 712.626, "eval_samples_per_second": 4.089, "eval_steps_per_second": 0.129, "learning_rate": 1e-05, "step": 29268 }, { "epoch": 108.86, "learning_rate": 1e-05, "loss": 0.1187, "step": 29500 }, { "epoch": 109.0, "eval_accuracy": 0.317433081674674, "eval_f1_macro": 0.7101233565606322, "eval_f1_micro": 0.8229869639937963, "eval_loss": 0.11885793507099152, "eval_roc_auc": 0.8816620950806512, "eval_runtime": 704.8781, "eval_samples_per_second": 4.134, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 29539 }, { "epoch": 110.0, "eval_accuracy": 0.316060398078243, "eval_f1_macro": 0.7043927351077296, "eval_f1_micro": 0.8223593100843511, "eval_loss": 0.11920594424009323, "eval_roc_auc": 0.8809626700624849, "eval_runtime": 698.225, "eval_samples_per_second": 4.173, "eval_steps_per_second": 0.132, "learning_rate": 1e-05, "step": 29810 }, { "epoch": 110.7, "learning_rate": 1e-05, "loss": 0.1185, "step": 30000 }, { "epoch": 111.0, "eval_accuracy": 0.317433081674674, "eval_f1_macro": 0.7082791431366098, "eval_f1_micro": 0.8226405643444505, "eval_loss": 0.1191883385181427, "eval_roc_auc": 0.8827318446199321, "eval_runtime": 708.9619, "eval_samples_per_second": 4.11, "eval_steps_per_second": 0.13, "learning_rate": 1e-05, "step": 30081 }, { "epoch": 112.0, "eval_accuracy": 0.3205216197666438, "eval_f1_macro": 0.7093446393354451, "eval_f1_micro": 0.8239410221167062, "eval_loss": 0.11904006451368332, "eval_roc_auc": 0.8841101210717583, "eval_runtime": 709.1728, "eval_samples_per_second": 4.109, "eval_steps_per_second": 0.13, "learning_rate": 1e-05, "step": 30352 }, { "epoch": 112.55, "learning_rate": 1e-05, "loss": 0.119, "step": 30500 }, { "epoch": 113.0, "eval_accuracy": 0.31708991077556625, "eval_f1_macro": 0.708022770004008, "eval_f1_micro": 0.8232849960526862, "eval_loss": 0.11945341527462006, "eval_roc_auc": 0.8844778642879284, "eval_runtime": 708.0786, "eval_samples_per_second": 4.115, "eval_steps_per_second": 0.13, "learning_rate": 1e-05, "step": 30623 }, { "epoch": 114.0, "eval_accuracy": 0.3181194234728895, "eval_f1_macro": 0.7061751648131201, "eval_f1_micro": 0.82203889216264, "eval_loss": 0.11900585889816284, "eval_roc_auc": 0.8798916554890817, "eval_runtime": 701.8292, "eval_samples_per_second": 4.152, "eval_steps_per_second": 0.131, "learning_rate": 1e-05, "step": 30894 }, { "epoch": 114.39, "learning_rate": 1e-05, "loss": 0.1182, "step": 31000 }, { "epoch": 115.0, "eval_accuracy": 0.317433081674674, "eval_f1_macro": 0.7081036238174518, "eval_f1_micro": 0.8228996779994145, "eval_loss": 0.11918609589338303, "eval_roc_auc": 0.8823203985481548, "eval_runtime": 711.6073, "eval_samples_per_second": 4.095, "eval_steps_per_second": 0.129, "learning_rate": 1e-05, "step": 31165 }, { "epoch": 116.0, "eval_accuracy": 0.32498284145504464, "eval_f1_macro": 0.7128134624321966, "eval_f1_micro": 0.8255601659751037, "eval_loss": 0.11899947375059128, "eval_roc_auc": 0.8861634026181779, "eval_runtime": 692.0205, "eval_samples_per_second": 4.211, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 31436 }, { "epoch": 116.24, "learning_rate": 1.0000000000000002e-06, "loss": 0.1191, "step": 31500 }, { "epoch": 117.0, "eval_accuracy": 0.31708991077556625, "eval_f1_macro": 0.7103576429034303, "eval_f1_micro": 0.8231059020510674, "eval_loss": 0.11870068311691284, "eval_roc_auc": 0.8821442452365535, "eval_runtime": 704.8278, "eval_samples_per_second": 4.134, "eval_steps_per_second": 0.131, "learning_rate": 1.0000000000000002e-06, "step": 31707 }, { "epoch": 118.0, "eval_accuracy": 0.31983527796842826, "eval_f1_macro": 0.7061058223171173, "eval_f1_micro": 0.8235982284616027, "eval_loss": 0.11885705590248108, "eval_roc_auc": 0.8829722487532972, "eval_runtime": 702.381, "eval_samples_per_second": 4.149, "eval_steps_per_second": 0.131, "learning_rate": 1.0000000000000002e-06, "step": 31978 }, { "epoch": 118.08, "learning_rate": 1.0000000000000002e-06, "loss": 0.1179, "step": 32000 }, { "epoch": 119.0, "eval_accuracy": 0.3181194234728895, "eval_f1_macro": 0.7080350612891082, "eval_f1_micro": 0.8233181609387398, "eval_loss": 0.11888550966978073, "eval_roc_auc": 0.882981257702253, "eval_runtime": 695.5592, "eval_samples_per_second": 4.189, "eval_steps_per_second": 0.132, "learning_rate": 1.0000000000000002e-06, "step": 32249 }, { "epoch": 119.93, "learning_rate": 1.0000000000000002e-06, "loss": 0.1176, "step": 32500 }, { "epoch": 120.0, "eval_accuracy": 0.31846259437199725, "eval_f1_macro": 0.710061474614843, "eval_f1_micro": 0.8238579526508836, "eval_loss": 0.11897823214530945, "eval_roc_auc": 0.883849117735537, "eval_runtime": 696.3539, "eval_samples_per_second": 4.185, "eval_steps_per_second": 0.132, "learning_rate": 1.0000000000000002e-06, "step": 32520 }, { "epoch": 121.0, "eval_accuracy": 0.3208647906657515, "eval_f1_macro": 0.7127667221711917, "eval_f1_micro": 0.8254309453929147, "eval_loss": 0.11947356164455414, "eval_roc_auc": 0.8872447167930093, "eval_runtime": 693.3862, "eval_samples_per_second": 4.203, "eval_steps_per_second": 0.133, "learning_rate": 1.0000000000000002e-06, "step": 32791 }, { "epoch": 121.77, "learning_rate": 1.0000000000000002e-06, "loss": 0.1175, "step": 33000 }, { "epoch": 122.0, "eval_accuracy": 0.31537405628002746, "eval_f1_macro": 0.7048262553703367, "eval_f1_micro": 0.8222669349429913, "eval_loss": 0.11920282989740372, "eval_roc_auc": 0.8812519652796962, "eval_runtime": 684.1671, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.134, "learning_rate": 1.0000000000000002e-06, "step": 33062 }, { "epoch": 123.0, "eval_accuracy": 0.3212079615648593, "eval_f1_macro": 0.7154256602927447, "eval_f1_micro": 0.8255195344970907, "eval_loss": 0.1192421168088913, "eval_roc_auc": 0.8856336776492516, "eval_runtime": 707.8447, "eval_samples_per_second": 4.117, "eval_steps_per_second": 0.13, "learning_rate": 1.0000000000000002e-06, "step": 33333 }, { "epoch": 123.62, "learning_rate": 1.0000000000000002e-07, "loss": 0.1176, "step": 33500 }, { "epoch": 124.0, "eval_accuracy": 0.3208647906657515, "eval_f1_macro": 0.7109465417900758, "eval_f1_micro": 0.8238532110091744, "eval_loss": 0.11888780444860458, "eval_roc_auc": 0.8836948286609143, "eval_runtime": 715.7035, "eval_samples_per_second": 4.072, "eval_steps_per_second": 0.129, "learning_rate": 1.0000000000000002e-07, "step": 33604 }, { "epoch": 125.0, "eval_accuracy": 0.3225806451612903, "eval_f1_macro": 0.7101964285625825, "eval_f1_micro": 0.8251958006998834, "eval_loss": 0.11894452571868896, "eval_roc_auc": 0.8847439533789889, "eval_runtime": 706.7139, "eval_samples_per_second": 4.123, "eval_steps_per_second": 0.13, "learning_rate": 1.0000000000000002e-07, "step": 33875 }, { "epoch": 125.46, "learning_rate": 1.0000000000000002e-07, "loss": 0.1179, "step": 34000 }, { "epoch": 126.0, "eval_accuracy": 0.3164035689773507, "eval_f1_macro": 0.702507166276965, "eval_f1_micro": 0.8206187305066173, "eval_loss": 0.11890433728694916, "eval_roc_auc": 0.8787088152808908, "eval_runtime": 705.4083, "eval_samples_per_second": 4.131, "eval_steps_per_second": 0.13, "learning_rate": 1.0000000000000002e-07, "step": 34146 }, { "epoch": 127.0, "eval_accuracy": 0.32155113246396705, "eval_f1_macro": 0.7103986965520878, "eval_f1_micro": 0.8245357813477989, "eval_loss": 0.11901579052209854, "eval_roc_auc": 0.8838838116421884, "eval_runtime": 704.1405, "eval_samples_per_second": 4.138, "eval_steps_per_second": 0.131, "learning_rate": 1.0000000000000002e-07, "step": 34417 }, { "epoch": 127.0, "learning_rate": 1.0000000000000002e-07, "step": 34417, "total_flos": 1.6323301632319567e+21, "train_loss": 0.0713793940258306, "train_runtime": 211000.3165, "train_samples_per_second": 6.162, "train_steps_per_second": 0.193 } ], "logging_steps": 500, "max_steps": 40650, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "total_flos": 1.6323301632319567e+21, "train_batch_size": 32, "trial_name": null, "trial_params": null }