{ "best_metric": 0.1039666160941124, "best_model_checkpoint": "./dino-base-2023_11_27-with_custom_head/checkpoint-43952", "epoch": 90.0, "eval_steps": 500, "global_step": 48240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.93, "learning_rate": 0.01, "loss": 0.2471, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.39442658092175775, "eval_f1_macro": 0.40064106475011957, "eval_f1_micro": 0.5684482898035094, "eval_loss": 0.2136440873146057, "eval_roc_auc": 0.7020299868465498, "eval_runtime": 648.3221, "eval_samples_per_second": 4.317, "eval_steps_per_second": 0.27, "learning_rate": 0.01, "step": 536 }, { "epoch": 1.87, "learning_rate": 0.01, "loss": 0.2208, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.4090746695248303, "eval_f1_macro": 0.646218183693934, "eval_f1_micro": 0.6908665105386417, "eval_loss": 0.21994435787200928, "eval_roc_auc": 0.7944947312416977, "eval_runtime": 649.3168, "eval_samples_per_second": 4.311, "eval_steps_per_second": 0.27, "learning_rate": 0.01, "step": 1072 }, { "epoch": 2.8, "learning_rate": 0.01, "loss": 0.2181, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.43122543765630583, "eval_f1_macro": 0.5770940809584216, "eval_f1_micro": 0.6712328767123288, "eval_loss": 0.19793672859668732, "eval_roc_auc": 0.7666801441240528, "eval_runtime": 645.3042, "eval_samples_per_second": 4.337, "eval_steps_per_second": 0.271, "learning_rate": 0.01, "step": 1608 }, { "epoch": 3.73, "learning_rate": 0.01, "loss": 0.2187, "step": 2000 }, { "epoch": 4.0, "eval_accuracy": 0.4137191854233655, "eval_f1_macro": 0.606113442290164, "eval_f1_micro": 0.7318466985527052, "eval_loss": 0.1766517609357834, "eval_roc_auc": 0.8268705246501159, "eval_runtime": 668.3118, "eval_samples_per_second": 4.188, "eval_steps_per_second": 0.262, "learning_rate": 0.01, "step": 2144 }, { "epoch": 4.66, "learning_rate": 0.01, "loss": 0.2128, "step": 2500 }, { "epoch": 5.0, "eval_accuracy": 0.43265451947123973, "eval_f1_macro": 0.5526692766303807, "eval_f1_micro": 0.7173007124613524, "eval_loss": 0.17600227892398834, "eval_roc_auc": 0.8011887560107374, "eval_runtime": 647.0188, "eval_samples_per_second": 4.326, "eval_steps_per_second": 0.27, "learning_rate": 0.01, "step": 2680 }, { "epoch": 5.6, "learning_rate": 0.01, "loss": 0.2171, "step": 3000 }, { "epoch": 6.0, "eval_accuracy": 0.4226509467667024, "eval_f1_macro": 0.5183527283365527, "eval_f1_micro": 0.7108839135498899, "eval_loss": 0.1992170512676239, "eval_roc_auc": 0.8061565099276957, "eval_runtime": 650.6673, "eval_samples_per_second": 4.302, "eval_steps_per_second": 0.269, "learning_rate": 0.01, "step": 3216 }, { "epoch": 6.53, "learning_rate": 0.01, "loss": 0.2108, "step": 3500 }, { "epoch": 7.0, "eval_accuracy": 0.41729188996070027, "eval_f1_macro": 0.6054999995580052, "eval_f1_micro": 0.7338740337092892, "eval_loss": 0.16948619484901428, "eval_roc_auc": 0.8232094683978447, "eval_runtime": 649.3312, "eval_samples_per_second": 4.311, "eval_steps_per_second": 0.27, "learning_rate": 0.01, "step": 3752 }, { "epoch": 7.46, "learning_rate": 0.01, "loss": 0.2147, "step": 4000 }, { "epoch": 8.0, "eval_accuracy": 0.4226509467667024, "eval_f1_macro": 0.6176021212641017, "eval_f1_micro": 0.7440822902466228, "eval_loss": 0.16185873746871948, "eval_roc_auc": 0.8341874074405569, "eval_runtime": 663.7112, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.264, "learning_rate": 0.01, "step": 4288 }, { "epoch": 8.4, "learning_rate": 0.01, "loss": 0.2112, "step": 4500 }, { "epoch": 9.0, "eval_accuracy": 0.43086816720257237, "eval_f1_macro": 0.6402287128969387, "eval_f1_micro": 0.7336987336987336, "eval_loss": 0.17079614102840424, "eval_roc_auc": 0.8244850070361233, "eval_runtime": 656.7931, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.266, "learning_rate": 0.01, "step": 4824 }, { "epoch": 9.33, "learning_rate": 0.01, "loss": 0.216, "step": 5000 }, { "epoch": 10.0, "eval_accuracy": 0.42836727402643804, "eval_f1_macro": 0.643393584513474, "eval_f1_micro": 0.7493341591824095, "eval_loss": 0.1750514954328537, "eval_roc_auc": 0.83718619371997, "eval_runtime": 657.1366, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.266, "learning_rate": 0.01, "step": 5360 }, { "epoch": 10.26, "learning_rate": 0.01, "loss": 0.2151, "step": 5500 }, { "epoch": 11.0, "eval_accuracy": 0.43372633083244017, "eval_f1_macro": 0.6292914445814428, "eval_f1_micro": 0.7241767706883986, "eval_loss": 0.17014054954051971, "eval_roc_auc": 0.8078434001509203, "eval_runtime": 662.7207, "eval_samples_per_second": 4.223, "eval_steps_per_second": 0.264, "learning_rate": 0.01, "step": 5896 }, { "epoch": 11.19, "learning_rate": 0.01, "loss": 0.213, "step": 6000 }, { "epoch": 12.0, "eval_accuracy": 0.3565559128260093, "eval_f1_macro": 0.5371100777543761, "eval_f1_micro": 0.6629161350191394, "eval_loss": 0.2409001588821411, "eval_roc_auc": 0.8034829311226885, "eval_runtime": 657.3534, "eval_samples_per_second": 4.258, "eval_steps_per_second": 0.266, "learning_rate": 0.01, "step": 6432 }, { "epoch": 12.13, "learning_rate": 0.01, "loss": 0.2161, "step": 6500 }, { "epoch": 13.0, "eval_accuracy": 0.4194355126831011, "eval_f1_macro": 0.6243034191518838, "eval_f1_micro": 0.7290690310322989, "eval_loss": 0.1704823523759842, "eval_roc_auc": 0.820743148916754, "eval_runtime": 665.336, "eval_samples_per_second": 4.207, "eval_steps_per_second": 0.263, "learning_rate": 0.01, "step": 6968 }, { "epoch": 13.06, "learning_rate": 0.01, "loss": 0.2145, "step": 7000 }, { "epoch": 13.99, "learning_rate": 0.01, "loss": 0.2136, "step": 7500 }, { "epoch": 14.0, "eval_accuracy": 0.44265809217577706, "eval_f1_macro": 0.6215367292632343, "eval_f1_micro": 0.734259379462547, "eval_loss": 0.16933664679527283, "eval_roc_auc": 0.8177495297062425, "eval_runtime": 654.4339, "eval_samples_per_second": 4.277, "eval_steps_per_second": 0.267, "learning_rate": 0.01, "step": 7504 }, { "epoch": 14.93, "learning_rate": 0.001, "loss": 0.1826, "step": 8000 }, { "epoch": 15.0, "eval_accuracy": 0.4969632011432655, "eval_f1_macro": 0.7459554779815883, "eval_f1_micro": 0.7967720076387605, "eval_loss": 0.13880470395088196, "eval_roc_auc": 0.8648126874646008, "eval_runtime": 668.3546, "eval_samples_per_second": 4.188, "eval_steps_per_second": 0.262, "learning_rate": 0.001, "step": 8040 }, { "epoch": 15.86, "learning_rate": 0.001, "loss": 0.1731, "step": 8500 }, { "epoch": 16.0, "eval_accuracy": 0.5155412647374062, "eval_f1_macro": 0.7385190479208986, "eval_f1_micro": 0.8016270337922403, "eval_loss": 0.14762958884239197, "eval_roc_auc": 0.8631052728177114, "eval_runtime": 657.5159, "eval_samples_per_second": 4.257, "eval_steps_per_second": 0.266, "learning_rate": 0.001, "step": 8576 }, { "epoch": 16.79, "learning_rate": 0.001, "loss": 0.1649, "step": 9000 }, { "epoch": 17.0, "eval_accuracy": 0.5023222579492675, "eval_f1_macro": 0.7692855132157861, "eval_f1_micro": 0.813318908522659, "eval_loss": 0.1351146697998047, "eval_roc_auc": 0.8811667807242409, "eval_runtime": 655.4487, "eval_samples_per_second": 4.27, "eval_steps_per_second": 0.267, "learning_rate": 0.001, "step": 9112 }, { "epoch": 17.72, "learning_rate": 0.001, "loss": 0.1624, "step": 9500 }, { "epoch": 18.0, "eval_accuracy": 0.5212575919971418, "eval_f1_macro": 0.7714136364578609, "eval_f1_micro": 0.8185593067340675, "eval_loss": 0.13848340511322021, "eval_roc_auc": 0.8837626029149266, "eval_runtime": 662.3967, "eval_samples_per_second": 4.226, "eval_steps_per_second": 0.264, "learning_rate": 0.001, "step": 9648 }, { "epoch": 18.66, "learning_rate": 0.001, "loss": 0.1576, "step": 10000 }, { "epoch": 19.0, "eval_accuracy": 0.5205430510896749, "eval_f1_macro": 0.7631806100991755, "eval_f1_micro": 0.8175969609705288, "eval_loss": 0.13018544018268585, "eval_roc_auc": 0.8779148602729905, "eval_runtime": 679.9157, "eval_samples_per_second": 4.117, "eval_steps_per_second": 0.257, "learning_rate": 0.001, "step": 10184 }, { "epoch": 19.59, "learning_rate": 0.001, "loss": 0.1544, "step": 10500 }, { "epoch": 20.0, "eval_accuracy": 0.5319757056091461, "eval_f1_macro": 0.7721701876863485, "eval_f1_micro": 0.8229907814143413, "eval_loss": 0.12343110144138336, "eval_roc_auc": 0.8780568409783432, "eval_runtime": 658.9399, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.266, "learning_rate": 0.001, "step": 10720 }, { "epoch": 20.52, "learning_rate": 0.001, "loss": 0.1542, "step": 11000 }, { "epoch": 21.0, "eval_accuracy": 0.525902107895677, "eval_f1_macro": 0.7754587726434372, "eval_f1_micro": 0.8267645466032855, "eval_loss": 0.13044790923595428, "eval_roc_auc": 0.8884477411328827, "eval_runtime": 659.4337, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.265, "learning_rate": 0.001, "step": 11256 }, { "epoch": 21.46, "learning_rate": 0.001, "loss": 0.1525, "step": 11500 }, { "epoch": 22.0, "eval_accuracy": 0.5376920328688818, "eval_f1_macro": 0.76461964189607, "eval_f1_micro": 0.8176823176823177, "eval_loss": 0.12196117639541626, "eval_roc_auc": 0.8724977198539325, "eval_runtime": 674.7285, "eval_samples_per_second": 4.148, "eval_steps_per_second": 0.259, "learning_rate": 0.001, "step": 11792 }, { "epoch": 22.39, "learning_rate": 0.001, "loss": 0.1505, "step": 12000 }, { "epoch": 23.0, "eval_accuracy": 0.5255448374419436, "eval_f1_macro": 0.7872363331907107, "eval_f1_micro": 0.8264049955396968, "eval_loss": 0.13109837472438812, "eval_roc_auc": 0.891814004155335, "eval_runtime": 658.8901, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.266, "learning_rate": 0.001, "step": 12328 }, { "epoch": 23.32, "learning_rate": 0.001, "loss": 0.1515, "step": 12500 }, { "epoch": 24.0, "eval_accuracy": 0.5316184351554126, "eval_f1_macro": 0.7629977302664256, "eval_f1_micro": 0.8188446438586652, "eval_loss": 0.12468679994344711, "eval_roc_auc": 0.8736890703040258, "eval_runtime": 649.926, "eval_samples_per_second": 4.307, "eval_steps_per_second": 0.269, "learning_rate": 0.001, "step": 12864 }, { "epoch": 24.25, "learning_rate": 0.001, "loss": 0.1471, "step": 13000 }, { "epoch": 25.0, "eval_accuracy": 0.5226866738120758, "eval_f1_macro": 0.7726394339166129, "eval_f1_micro": 0.8258118937157316, "eval_loss": 0.1264602392911911, "eval_roc_auc": 0.8875076814616129, "eval_runtime": 661.9223, "eval_samples_per_second": 4.229, "eval_steps_per_second": 0.264, "learning_rate": 0.001, "step": 13400 }, { "epoch": 25.19, "learning_rate": 0.001, "loss": 0.1475, "step": 13500 }, { "epoch": 26.0, "eval_accuracy": 0.5330475169703466, "eval_f1_macro": 0.7834057788273457, "eval_f1_micro": 0.8301059554256485, "eval_loss": 0.12767212092876434, "eval_roc_auc": 0.8867279830614062, "eval_runtime": 654.8587, "eval_samples_per_second": 4.274, "eval_steps_per_second": 0.267, "learning_rate": 0.001, "step": 13936 }, { "epoch": 26.12, "learning_rate": 0.001, "loss": 0.1484, "step": 14000 }, { "epoch": 27.0, "eval_accuracy": 0.5398356555912825, "eval_f1_macro": 0.7661233818608114, "eval_f1_micro": 0.8218290555693994, "eval_loss": 0.12360195070505142, "eval_roc_auc": 0.8754121833739898, "eval_runtime": 658.0747, "eval_samples_per_second": 4.253, "eval_steps_per_second": 0.266, "learning_rate": 0.001, "step": 14472 }, { "epoch": 27.05, "learning_rate": 0.001, "loss": 0.1475, "step": 14500 }, { "epoch": 27.99, "learning_rate": 0.001, "loss": 0.1472, "step": 15000 }, { "epoch": 28.0, "eval_accuracy": 0.5401929260450161, "eval_f1_macro": 0.7728524137767288, "eval_f1_micro": 0.8228913409388442, "eval_loss": 0.1256585270166397, "eval_roc_auc": 0.8757945197904818, "eval_runtime": 655.1438, "eval_samples_per_second": 4.272, "eval_steps_per_second": 0.267, "learning_rate": 0.001, "step": 15008 }, { "epoch": 28.92, "learning_rate": 0.0001, "loss": 0.1379, "step": 15500 }, { "epoch": 29.0, "eval_accuracy": 0.5451947123972848, "eval_f1_macro": 0.7891009081675242, "eval_f1_micro": 0.8352094482376824, "eval_loss": 0.11994459480047226, "eval_roc_auc": 0.8865305601939816, "eval_runtime": 657.6068, "eval_samples_per_second": 4.256, "eval_steps_per_second": 0.266, "learning_rate": 0.0001, "step": 15544 }, { "epoch": 29.85, "learning_rate": 0.0001, "loss": 0.1349, "step": 16000 }, { "epoch": 30.0, "eval_accuracy": 0.5487674169346195, "eval_f1_macro": 0.7943922901606704, "eval_f1_micro": 0.841315916787615, "eval_loss": 0.11564121395349503, "eval_roc_auc": 0.895138845939546, "eval_runtime": 659.4245, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 16080 }, { "epoch": 30.78, "learning_rate": 0.0001, "loss": 0.1326, "step": 16500 }, { "epoch": 31.0, "eval_accuracy": 0.5498392282958199, "eval_f1_macro": 0.7983364672353336, "eval_f1_micro": 0.84037558685446, "eval_loss": 0.11515345424413681, "eval_roc_auc": 0.8960665621862288, "eval_runtime": 652.6109, "eval_samples_per_second": 4.289, "eval_steps_per_second": 0.268, "learning_rate": 0.0001, "step": 16616 }, { "epoch": 31.72, "learning_rate": 0.0001, "loss": 0.1321, "step": 17000 }, { "epoch": 32.0, "eval_accuracy": 0.5509110396570204, "eval_f1_macro": 0.7911364775051637, "eval_f1_micro": 0.8385913426265589, "eval_loss": 0.11371538788080215, "eval_roc_auc": 0.89024940896342, "eval_runtime": 663.8067, "eval_samples_per_second": 4.217, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 17152 }, { "epoch": 32.65, "learning_rate": 0.0001, "loss": 0.1294, "step": 17500 }, { "epoch": 33.0, "eval_accuracy": 0.5512683101107538, "eval_f1_macro": 0.7924466456431516, "eval_f1_micro": 0.8406133545115768, "eval_loss": 0.11363548040390015, "eval_roc_auc": 0.891620070857568, "eval_runtime": 641.5792, "eval_samples_per_second": 4.363, "eval_steps_per_second": 0.273, "learning_rate": 0.0001, "step": 17688 }, { "epoch": 33.58, "learning_rate": 0.0001, "loss": 0.1297, "step": 18000 }, { "epoch": 34.0, "eval_accuracy": 0.5551982851018221, "eval_f1_macro": 0.7994952163993189, "eval_f1_micro": 0.8438879816125325, "eval_loss": 0.1100151389837265, "eval_roc_auc": 0.8965325171836247, "eval_runtime": 666.5011, "eval_samples_per_second": 4.2, "eval_steps_per_second": 0.263, "learning_rate": 0.0001, "step": 18224 }, { "epoch": 34.51, "learning_rate": 0.0001, "loss": 0.1296, "step": 18500 }, { "epoch": 35.0, "eval_accuracy": 0.5569846373704894, "eval_f1_macro": 0.7958547104209327, "eval_f1_micro": 0.8430563978168587, "eval_loss": 0.11022897809743881, "eval_roc_auc": 0.8952561397957243, "eval_runtime": 651.2057, "eval_samples_per_second": 4.298, "eval_steps_per_second": 0.269, "learning_rate": 0.0001, "step": 18760 }, { "epoch": 35.45, "learning_rate": 0.0001, "loss": 0.1276, "step": 19000 }, { "epoch": 36.0, "eval_accuracy": 0.5584137191854234, "eval_f1_macro": 0.7954026209465306, "eval_f1_micro": 0.8428501708150318, "eval_loss": 0.11038191616535187, "eval_roc_auc": 0.8933002510838177, "eval_runtime": 659.4359, "eval_samples_per_second": 4.245, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 19296 }, { "epoch": 36.38, "learning_rate": 0.0001, "loss": 0.1264, "step": 19500 }, { "epoch": 37.0, "eval_accuracy": 0.5644873168988924, "eval_f1_macro": 0.8073166114826349, "eval_f1_micro": 0.8467775842392937, "eval_loss": 0.11108729988336563, "eval_roc_auc": 0.9003547734565809, "eval_runtime": 649.1518, "eval_samples_per_second": 4.312, "eval_steps_per_second": 0.27, "learning_rate": 0.0001, "step": 19832 }, { "epoch": 37.31, "learning_rate": 0.0001, "loss": 0.1279, "step": 20000 }, { "epoch": 38.0, "eval_accuracy": 0.5662736691675598, "eval_f1_macro": 0.8059718169069215, "eval_f1_micro": 0.8457475869604808, "eval_loss": 0.11050034314393997, "eval_roc_auc": 0.8964483289954395, "eval_runtime": 661.6908, "eval_samples_per_second": 4.23, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 20368 }, { "epoch": 38.25, "learning_rate": 0.0001, "loss": 0.1231, "step": 20500 }, { "epoch": 39.0, "eval_accuracy": 0.5623436941764915, "eval_f1_macro": 0.810452280144873, "eval_f1_micro": 0.8481141692150868, "eval_loss": 0.1114969253540039, "eval_roc_auc": 0.9016441984475202, "eval_runtime": 645.0365, "eval_samples_per_second": 4.339, "eval_steps_per_second": 0.271, "learning_rate": 0.0001, "step": 20904 }, { "epoch": 39.18, "learning_rate": 0.0001, "loss": 0.1276, "step": 21000 }, { "epoch": 40.0, "eval_accuracy": 0.564844587352626, "eval_f1_macro": 0.7998757908744976, "eval_f1_micro": 0.8442948914040991, "eval_loss": 0.10886894911527634, "eval_roc_auc": 0.8932229387863774, "eval_runtime": 664.3024, "eval_samples_per_second": 4.213, "eval_steps_per_second": 0.263, "learning_rate": 0.0001, "step": 21440 }, { "epoch": 40.11, "learning_rate": 0.0001, "loss": 0.121, "step": 21500 }, { "epoch": 41.0, "eval_accuracy": 0.5627009646302251, "eval_f1_macro": 0.8014681171854418, "eval_f1_micro": 0.8454512239678481, "eval_loss": 0.10981705039739609, "eval_roc_auc": 0.8953198348828445, "eval_runtime": 652.9462, "eval_samples_per_second": 4.287, "eval_steps_per_second": 0.268, "learning_rate": 0.0001, "step": 21976 }, { "epoch": 41.04, "learning_rate": 0.0001, "loss": 0.1241, "step": 22000 }, { "epoch": 41.98, "learning_rate": 0.0001, "loss": 0.1229, "step": 22500 }, { "epoch": 42.0, "eval_accuracy": 0.5619864237227581, "eval_f1_macro": 0.8009464266100834, "eval_f1_micro": 0.8459390554813646, "eval_loss": 0.10872387140989304, "eval_roc_auc": 0.8965747458488879, "eval_runtime": 655.0026, "eval_samples_per_second": 4.273, "eval_steps_per_second": 0.267, "learning_rate": 0.0001, "step": 22512 }, { "epoch": 42.91, "learning_rate": 0.0001, "loss": 0.1227, "step": 23000 }, { "epoch": 43.0, "eval_accuracy": 0.5687745623436942, "eval_f1_macro": 0.8067169938735436, "eval_f1_micro": 0.8468292682926829, "eval_loss": 0.10882638394832611, "eval_roc_auc": 0.8956753719148074, "eval_runtime": 660.8489, "eval_samples_per_second": 4.235, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 23048 }, { "epoch": 43.84, "learning_rate": 0.0001, "loss": 0.1221, "step": 23500 }, { "epoch": 44.0, "eval_accuracy": 0.5673454805287603, "eval_f1_macro": 0.8065744332804647, "eval_f1_micro": 0.8476346632659257, "eval_loss": 0.10762665420770645, "eval_roc_auc": 0.897394560379003, "eval_runtime": 657.9808, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.266, "learning_rate": 0.0001, "step": 23584 }, { "epoch": 44.78, "learning_rate": 0.0001, "loss": 0.1191, "step": 24000 }, { "epoch": 45.0, "eval_accuracy": 0.5698463737048947, "eval_f1_macro": 0.817339787108748, "eval_f1_micro": 0.8507955568898229, "eval_loss": 0.10689569264650345, "eval_roc_auc": 0.9027498177813545, "eval_runtime": 671.4032, "eval_samples_per_second": 4.169, "eval_steps_per_second": 0.261, "learning_rate": 0.0001, "step": 24120 }, { "epoch": 45.71, "learning_rate": 0.0001, "loss": 0.1212, "step": 24500 }, { "epoch": 46.0, "eval_accuracy": 0.563415505537692, "eval_f1_macro": 0.8173548628519673, "eval_f1_micro": 0.8508932256352808, "eval_loss": 0.10721632838249207, "eval_roc_auc": 0.9086343215547117, "eval_runtime": 661.9534, "eval_samples_per_second": 4.228, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 24656 }, { "epoch": 46.64, "learning_rate": 0.0001, "loss": 0.1198, "step": 25000 }, { "epoch": 47.0, "eval_accuracy": 0.5687745623436942, "eval_f1_macro": 0.8090465345914333, "eval_f1_micro": 0.849293563579278, "eval_loss": 0.10663535445928574, "eval_roc_auc": 0.900130416633457, "eval_runtime": 673.8512, "eval_samples_per_second": 4.154, "eval_steps_per_second": 0.26, "learning_rate": 0.0001, "step": 25192 }, { "epoch": 47.57, "learning_rate": 0.0001, "loss": 0.1201, "step": 25500 }, { "epoch": 48.0, "eval_accuracy": 0.5652018578063595, "eval_f1_macro": 0.8082582901487069, "eval_f1_micro": 0.8484811957569913, "eval_loss": 0.10762892663478851, "eval_roc_auc": 0.9002407000277199, "eval_runtime": 662.4725, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 25728 }, { "epoch": 48.51, "learning_rate": 0.0001, "loss": 0.1189, "step": 26000 }, { "epoch": 49.0, "eval_accuracy": 0.5687745623436942, "eval_f1_macro": 0.8152048365492516, "eval_f1_micro": 0.8507597141312836, "eval_loss": 0.10654111951589584, "eval_roc_auc": 0.9026532861685821, "eval_runtime": 679.062, "eval_samples_per_second": 4.122, "eval_steps_per_second": 0.258, "learning_rate": 0.0001, "step": 26264 }, { "epoch": 49.44, "learning_rate": 0.0001, "loss": 0.1176, "step": 26500 }, { "epoch": 50.0, "eval_accuracy": 0.563415505537692, "eval_f1_macro": 0.8033757987633436, "eval_f1_micro": 0.8462940461725396, "eval_loss": 0.10730718821287155, "eval_roc_auc": 0.8965096424048645, "eval_runtime": 643.0995, "eval_samples_per_second": 4.352, "eval_steps_per_second": 0.272, "learning_rate": 0.0001, "step": 26800 }, { "epoch": 50.37, "learning_rate": 0.0001, "loss": 0.1202, "step": 27000 }, { "epoch": 51.0, "eval_accuracy": 0.5727045373347625, "eval_f1_macro": 0.8102272806912906, "eval_f1_micro": 0.8480676328502416, "eval_loss": 0.10732194036245346, "eval_roc_auc": 0.8994002578253693, "eval_runtime": 645.0782, "eval_samples_per_second": 4.339, "eval_steps_per_second": 0.271, "learning_rate": 0.0001, "step": 27336 }, { "epoch": 51.31, "learning_rate": 0.0001, "loss": 0.1167, "step": 27500 }, { "epoch": 52.0, "eval_accuracy": 0.571632725973562, "eval_f1_macro": 0.8179079350328715, "eval_f1_micro": 0.8521687462863933, "eval_loss": 0.10600127279758453, "eval_roc_auc": 0.9068707918898985, "eval_runtime": 661.3351, "eval_samples_per_second": 4.232, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 27872 }, { "epoch": 52.24, "learning_rate": 0.0001, "loss": 0.1192, "step": 28000 }, { "epoch": 53.0, "eval_accuracy": 0.5712754555198285, "eval_f1_macro": 0.8127939311574505, "eval_f1_micro": 0.8507093268940539, "eval_loss": 0.10628383606672287, "eval_roc_auc": 0.9009869922949931, "eval_runtime": 649.2151, "eval_samples_per_second": 4.311, "eval_steps_per_second": 0.27, "learning_rate": 0.0001, "step": 28408 }, { "epoch": 53.17, "learning_rate": 0.0001, "loss": 0.1156, "step": 28500 }, { "epoch": 54.0, "eval_accuracy": 0.5719899964272954, "eval_f1_macro": 0.8113280265904282, "eval_f1_micro": 0.8493415488703637, "eval_loss": 0.10670817643404007, "eval_roc_auc": 0.9000055427080821, "eval_runtime": 659.1604, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 28944 }, { "epoch": 54.1, "learning_rate": 0.0001, "loss": 0.1193, "step": 29000 }, { "epoch": 55.0, "eval_accuracy": 0.5727045373347625, "eval_f1_macro": 0.8116343899877551, "eval_f1_micro": 0.8490052609300356, "eval_loss": 0.10690104961395264, "eval_roc_auc": 0.8994929994035434, "eval_runtime": 663.4912, "eval_samples_per_second": 4.219, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 29480 }, { "epoch": 55.04, "learning_rate": 0.0001, "loss": 0.1161, "step": 29500 }, { "epoch": 55.97, "learning_rate": 0.0001, "loss": 0.116, "step": 30000 }, { "epoch": 56.0, "eval_accuracy": 0.5744908896034298, "eval_f1_macro": 0.818624257330544, "eval_f1_micro": 0.8542707589816796, "eval_loss": 0.10558204352855682, "eval_roc_auc": 0.9077319963356549, "eval_runtime": 659.0048, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.266, "learning_rate": 0.0001, "step": 30016 }, { "epoch": 56.9, "learning_rate": 0.0001, "loss": 0.1147, "step": 30500 }, { "epoch": 57.0, "eval_accuracy": 0.5730618077884959, "eval_f1_macro": 0.8113712664273885, "eval_f1_micro": 0.8505116959064327, "eval_loss": 0.10627623647451401, "eval_roc_auc": 0.8980389880708319, "eval_runtime": 669.8276, "eval_samples_per_second": 4.179, "eval_steps_per_second": 0.261, "learning_rate": 0.0001, "step": 30552 }, { "epoch": 57.84, "learning_rate": 0.0001, "loss": 0.1139, "step": 31000 }, { "epoch": 58.0, "eval_accuracy": 0.5705609146123616, "eval_f1_macro": 0.8074486566040442, "eval_f1_micro": 0.8488632919066383, "eval_loss": 0.10657747834920883, "eval_roc_auc": 0.898615796069772, "eval_runtime": 654.902, "eval_samples_per_second": 4.274, "eval_steps_per_second": 0.267, "learning_rate": 0.0001, "step": 31088 }, { "epoch": 58.77, "learning_rate": 0.0001, "loss": 0.1143, "step": 31500 }, { "epoch": 59.0, "eval_accuracy": 0.5727045373347625, "eval_f1_macro": 0.8065194300593265, "eval_f1_micro": 0.8490646517579673, "eval_loss": 0.10738535225391388, "eval_roc_auc": 0.8971341466029096, "eval_runtime": 661.8186, "eval_samples_per_second": 4.229, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 31624 }, { "epoch": 59.7, "learning_rate": 0.0001, "loss": 0.1148, "step": 32000 }, { "epoch": 60.0, "eval_accuracy": 0.5694891032511611, "eval_f1_macro": 0.8079607267977419, "eval_f1_micro": 0.8498935199269851, "eval_loss": 0.10777446627616882, "eval_roc_auc": 0.8980542827803957, "eval_runtime": 663.1043, "eval_samples_per_second": 4.221, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 32160 }, { "epoch": 60.63, "learning_rate": 0.0001, "loss": 0.1143, "step": 32500 }, { "epoch": 61.0, "eval_accuracy": 0.572347266881029, "eval_f1_macro": 0.8159017507862669, "eval_f1_micro": 0.8512052195976559, "eval_loss": 0.10536229610443115, "eval_roc_auc": 0.9010383439575261, "eval_runtime": 660.7796, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 32696 }, { "epoch": 61.57, "learning_rate": 0.0001, "loss": 0.1133, "step": 33000 }, { "epoch": 62.0, "eval_accuracy": 0.5737763486959628, "eval_f1_macro": 0.808341771577789, "eval_f1_micro": 0.8495672315006705, "eval_loss": 0.10581369698047638, "eval_roc_auc": 0.8973203339377094, "eval_runtime": 655.4703, "eval_samples_per_second": 4.27, "eval_steps_per_second": 0.267, "learning_rate": 0.0001, "step": 33232 }, { "epoch": 62.5, "learning_rate": 0.0001, "loss": 0.1134, "step": 33500 }, { "epoch": 63.0, "eval_accuracy": 0.5680600214362272, "eval_f1_macro": 0.8087952156757714, "eval_f1_micro": 0.847873368777187, "eval_loss": 0.1063385158777237, "eval_roc_auc": 0.899139005299654, "eval_runtime": 659.92, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.265, "learning_rate": 0.0001, "step": 33768 }, { "epoch": 63.43, "learning_rate": 0.0001, "loss": 0.1123, "step": 34000 }, { "epoch": 64.0, "eval_accuracy": 0.5702036441586281, "eval_f1_macro": 0.8121242156828758, "eval_f1_micro": 0.8503755754785559, "eval_loss": 0.10543316602706909, "eval_roc_auc": 0.8997137007632394, "eval_runtime": 647.0637, "eval_samples_per_second": 4.326, "eval_steps_per_second": 0.27, "learning_rate": 0.0001, "step": 34304 }, { "epoch": 64.37, "learning_rate": 0.0001, "loss": 0.1141, "step": 34500 }, { "epoch": 65.0, "eval_accuracy": 0.5730618077884959, "eval_f1_macro": 0.8099395952334975, "eval_f1_micro": 0.8494330240737371, "eval_loss": 0.10500979423522949, "eval_roc_auc": 0.8988602109578501, "eval_runtime": 649.7608, "eval_samples_per_second": 4.308, "eval_steps_per_second": 0.269, "learning_rate": 0.0001, "step": 34840 }, { "epoch": 65.3, "learning_rate": 0.0001, "loss": 0.1104, "step": 35000 }, { "epoch": 66.0, "eval_accuracy": 0.5762772418720972, "eval_f1_macro": 0.8132610218333434, "eval_f1_micro": 0.8506980430409072, "eval_loss": 0.10500740259885788, "eval_roc_auc": 0.8978957335797464, "eval_runtime": 652.9454, "eval_samples_per_second": 4.287, "eval_steps_per_second": 0.268, "learning_rate": 0.0001, "step": 35376 }, { "epoch": 66.23, "learning_rate": 0.0001, "loss": 0.1124, "step": 35500 }, { "epoch": 67.0, "eval_accuracy": 0.5669882100750268, "eval_f1_macro": 0.8163481281680508, "eval_f1_micro": 0.8512857399748246, "eval_loss": 0.10600199550390244, "eval_roc_auc": 0.9035619176711028, "eval_runtime": 657.7451, "eval_samples_per_second": 4.255, "eval_steps_per_second": 0.266, "learning_rate": 0.0001, "step": 35912 }, { "epoch": 67.16, "learning_rate": 0.0001, "loss": 0.1111, "step": 36000 }, { "epoch": 68.0, "eval_accuracy": 0.5680600214362272, "eval_f1_macro": 0.8156635542444352, "eval_f1_micro": 0.8511868899867453, "eval_loss": 0.10536548495292664, "eval_roc_auc": 0.9018856620506218, "eval_runtime": 656.6156, "eval_samples_per_second": 4.263, "eval_steps_per_second": 0.267, "learning_rate": 0.0001, "step": 36448 }, { "epoch": 68.1, "learning_rate": 0.0001, "loss": 0.1097, "step": 36500 }, { "epoch": 69.0, "eval_accuracy": 0.5673454805287603, "eval_f1_macro": 0.8109923908741982, "eval_f1_micro": 0.8500871237156763, "eval_loss": 0.1056470051407814, "eval_roc_auc": 0.9021192764421861, "eval_runtime": 657.5814, "eval_samples_per_second": 4.257, "eval_steps_per_second": 0.266, "learning_rate": 0.0001, "step": 36984 }, { "epoch": 69.03, "learning_rate": 0.0001, "loss": 0.1106, "step": 37000 }, { "epoch": 69.96, "learning_rate": 0.0001, "loss": 0.1096, "step": 37500 }, { "epoch": 70.0, "eval_accuracy": 0.5673454805287603, "eval_f1_macro": 0.8119494369118779, "eval_f1_micro": 0.8500514558992676, "eval_loss": 0.1059202253818512, "eval_roc_auc": 0.8997405001477684, "eval_runtime": 661.9686, "eval_samples_per_second": 4.228, "eval_steps_per_second": 0.264, "learning_rate": 0.0001, "step": 37520 }, { "epoch": 70.9, "learning_rate": 0.0001, "loss": 0.1097, "step": 38000 }, { "epoch": 71.0, "eval_accuracy": 0.5737763486959628, "eval_f1_macro": 0.8171777200998823, "eval_f1_micro": 0.8517297200071947, "eval_loss": 0.10546696186065674, "eval_roc_auc": 0.9037381432590107, "eval_runtime": 654.6273, "eval_samples_per_second": 4.276, "eval_steps_per_second": 0.267, "learning_rate": 0.0001, "step": 38056 }, { "epoch": 71.83, "learning_rate": 1e-05, "loss": 0.1084, "step": 38500 }, { "epoch": 72.0, "eval_accuracy": 0.5630582350839586, "eval_f1_macro": 0.8063147670569071, "eval_f1_micro": 0.84692696594993, "eval_loss": 0.10735420882701874, "eval_roc_auc": 0.8960515366190057, "eval_runtime": 658.0374, "eval_samples_per_second": 4.254, "eval_steps_per_second": 0.266, "learning_rate": 1e-05, "step": 38592 }, { "epoch": 72.76, "learning_rate": 1e-05, "loss": 0.1091, "step": 39000 }, { "epoch": 73.0, "eval_accuracy": 0.5734190782422294, "eval_f1_macro": 0.8171103344103756, "eval_f1_micro": 0.8525143029208069, "eval_loss": 0.10438621789216995, "eval_roc_auc": 0.9027705800247602, "eval_runtime": 664.8235, "eval_samples_per_second": 4.21, "eval_steps_per_second": 0.263, "learning_rate": 1e-05, "step": 39128 }, { "epoch": 73.69, "learning_rate": 1e-05, "loss": 0.1051, "step": 39500 }, { "epoch": 74.0, "eval_accuracy": 0.571632725973562, "eval_f1_macro": 0.8187356068324974, "eval_f1_micro": 0.8532934131736527, "eval_loss": 0.10408420860767365, "eval_roc_auc": 0.9050291111780234, "eval_runtime": 673.5967, "eval_samples_per_second": 4.155, "eval_steps_per_second": 0.26, "learning_rate": 1e-05, "step": 39664 }, { "epoch": 74.63, "learning_rate": 1e-05, "loss": 0.1069, "step": 40000 }, { "epoch": 75.0, "eval_accuracy": 0.5698463737048947, "eval_f1_macro": 0.8154818599661411, "eval_f1_micro": 0.8505788712011578, "eval_loss": 0.10555566847324371, "eval_roc_auc": 0.9012750442052373, "eval_runtime": 657.1771, "eval_samples_per_second": 4.259, "eval_steps_per_second": 0.266, "learning_rate": 1e-05, "step": 40200 }, { "epoch": 75.56, "learning_rate": 1e-05, "loss": 0.1079, "step": 40500 }, { "epoch": 76.0, "eval_accuracy": 0.572347266881029, "eval_f1_macro": 0.8153806167901401, "eval_f1_micro": 0.8517114840883114, "eval_loss": 0.10429207235574722, "eval_roc_auc": 0.9026594578808756, "eval_runtime": 664.6613, "eval_samples_per_second": 4.211, "eval_steps_per_second": 0.263, "learning_rate": 1e-05, "step": 40736 }, { "epoch": 76.49, "learning_rate": 1e-05, "loss": 0.1072, "step": 41000 }, { "epoch": 77.0, "eval_accuracy": 0.5780635941407646, "eval_f1_macro": 0.8187874152985335, "eval_f1_micro": 0.853607002716571, "eval_loss": 0.10398340970277786, "eval_roc_auc": 0.9026235354990767, "eval_runtime": 664.3977, "eval_samples_per_second": 4.213, "eval_steps_per_second": 0.263, "learning_rate": 1e-05, "step": 41272 }, { "epoch": 77.43, "learning_rate": 1e-05, "loss": 0.105, "step": 41500 }, { "epoch": 78.0, "eval_accuracy": 0.5712754555198285, "eval_f1_macro": 0.8150330415154026, "eval_f1_micro": 0.8514409743156879, "eval_loss": 0.10431113094091415, "eval_roc_auc": 0.9018006351128146, "eval_runtime": 660.4258, "eval_samples_per_second": 4.238, "eval_steps_per_second": 0.265, "learning_rate": 1e-05, "step": 41808 }, { "epoch": 78.36, "learning_rate": 1e-05, "loss": 0.1061, "step": 42000 }, { "epoch": 79.0, "eval_accuracy": 0.5734190782422294, "eval_f1_macro": 0.8180685346674, "eval_f1_micro": 0.8525934861278649, "eval_loss": 0.10427288711071014, "eval_roc_auc": 0.9023377306953573, "eval_runtime": 662.4252, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.264, "learning_rate": 1e-05, "step": 42344 }, { "epoch": 79.29, "learning_rate": 1e-05, "loss": 0.1045, "step": 42500 }, { "epoch": 80.0, "eval_accuracy": 0.5719899964272954, "eval_f1_macro": 0.8176554657248621, "eval_f1_micro": 0.8512023142288917, "eval_loss": 0.10507169365882874, "eval_roc_auc": 0.9017990921847413, "eval_runtime": 662.5216, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.264, "learning_rate": 1e-05, "step": 42880 }, { "epoch": 80.22, "learning_rate": 1e-05, "loss": 0.1062, "step": 43000 }, { "epoch": 81.0, "eval_accuracy": 0.5694891032511611, "eval_f1_macro": 0.8157488543073257, "eval_f1_micro": 0.8500577402297453, "eval_loss": 0.10686225444078445, "eval_roc_auc": 0.8984886750378719, "eval_runtime": 655.8239, "eval_samples_per_second": 4.268, "eval_steps_per_second": 0.267, "learning_rate": 1e-05, "step": 43416 }, { "epoch": 81.16, "learning_rate": 1e-05, "loss": 0.1057, "step": 43500 }, { "epoch": 82.0, "eval_accuracy": 0.5734190782422294, "eval_f1_macro": 0.8184447896335341, "eval_f1_micro": 0.8530242056579975, "eval_loss": 0.1039666160941124, "eval_roc_auc": 0.9039006170654286, "eval_runtime": 659.9314, "eval_samples_per_second": 4.241, "eval_steps_per_second": 0.265, "learning_rate": 1e-05, "step": 43952 }, { "epoch": 82.09, "learning_rate": 1e-05, "loss": 0.1073, "step": 44000 }, { "epoch": 83.0, "eval_accuracy": 0.5712754555198285, "eval_f1_macro": 0.8151349204991644, "eval_f1_micro": 0.8503600634688149, "eval_loss": 0.10477207601070404, "eval_roc_auc": 0.8973831902752079, "eval_runtime": 660.9853, "eval_samples_per_second": 4.235, "eval_steps_per_second": 0.265, "learning_rate": 1e-05, "step": 44488 }, { "epoch": 83.02, "learning_rate": 1e-05, "loss": 0.1053, "step": 44500 }, { "epoch": 83.96, "learning_rate": 1e-05, "loss": 0.1059, "step": 45000 }, { "epoch": 84.0, "eval_accuracy": 0.5741336191496963, "eval_f1_macro": 0.8177615611039802, "eval_f1_micro": 0.8525637163342773, "eval_loss": 0.1043851226568222, "eval_roc_auc": 0.9026457060993854, "eval_runtime": 661.1385, "eval_samples_per_second": 4.234, "eval_steps_per_second": 0.265, "learning_rate": 1e-05, "step": 45024 }, { "epoch": 84.89, "learning_rate": 1e-05, "loss": 0.1054, "step": 45500 }, { "epoch": 85.0, "eval_accuracy": 0.5702036441586281, "eval_f1_macro": 0.8148620661290662, "eval_f1_micro": 0.8505413415593056, "eval_loss": 0.10626183450222015, "eval_roc_auc": 0.9002829286929828, "eval_runtime": 667.6418, "eval_samples_per_second": 4.192, "eval_steps_per_second": 0.262, "learning_rate": 1e-05, "step": 45560 }, { "epoch": 85.82, "learning_rate": 1e-05, "loss": 0.1046, "step": 46000 }, { "epoch": 86.0, "eval_accuracy": 0.5727045373347625, "eval_f1_macro": 0.8160800717420196, "eval_f1_micro": 0.8525306469231703, "eval_loss": 0.10432148724794388, "eval_roc_auc": 0.9003534996708482, "eval_runtime": 656.1196, "eval_samples_per_second": 4.266, "eval_steps_per_second": 0.267, "learning_rate": 1e-05, "step": 46096 }, { "epoch": 86.75, "learning_rate": 1e-05, "loss": 0.105, "step": 46500 }, { "epoch": 87.0, "eval_accuracy": 0.5719899964272954, "eval_f1_macro": 0.8185429098530308, "eval_f1_micro": 0.8532148646214804, "eval_loss": 0.10474765300750732, "eval_roc_auc": 0.904161869591144, "eval_runtime": 662.4652, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.264, "learning_rate": 1e-05, "step": 46632 }, { "epoch": 87.69, "learning_rate": 1e-05, "loss": 0.1029, "step": 47000 }, { "epoch": 88.0, "eval_accuracy": 0.5759199714183637, "eval_f1_macro": 0.8155503181233792, "eval_f1_micro": 0.8518207954000958, "eval_loss": 0.10432733595371246, "eval_roc_auc": 0.9041143079632091, "eval_runtime": 659.661, "eval_samples_per_second": 4.243, "eval_steps_per_second": 0.265, "learning_rate": 1e-05, "step": 47168 }, { "epoch": 88.62, "learning_rate": 1.0000000000000002e-06, "loss": 0.1059, "step": 47500 }, { "epoch": 89.0, "eval_accuracy": 0.5787781350482315, "eval_f1_macro": 0.8178092875894164, "eval_f1_micro": 0.8538892570946557, "eval_loss": 0.10400809347629547, "eval_roc_auc": 0.9033957884012571, "eval_runtime": 659.0133, "eval_samples_per_second": 4.247, "eval_steps_per_second": 0.266, "learning_rate": 1.0000000000000002e-06, "step": 47704 }, { "epoch": 89.55, "learning_rate": 1.0000000000000002e-06, "loss": 0.1047, "step": 48000 }, { "epoch": 90.0, "eval_accuracy": 0.5694891032511611, "eval_f1_macro": 0.8136056007290207, "eval_f1_micro": 0.8504655833485485, "eval_loss": 0.10469033569097519, "eval_roc_auc": 0.8982986976684738, "eval_runtime": 653.5683, "eval_samples_per_second": 4.283, "eval_steps_per_second": 0.268, "learning_rate": 1.0000000000000002e-06, "step": 48240 }, { "epoch": 90.0, "learning_rate": 1.0000000000000002e-06, "step": 48240, "total_flos": 6.13340503159962e+19, "train_loss": 0.13765035268679188, "train_runtime": 238196.7751, "train_samples_per_second": 3.237, "train_steps_per_second": 0.203 } ], "logging_steps": 500, "max_steps": 48240, "num_train_epochs": 90, "save_steps": 500, "total_flos": 6.13340503159962e+19, "trial_name": null, "trial_params": null }