lombardata's picture
🍻 cheers
25c63a6
raw history blame
No virus
47.5 kB
{
"best_metric": 0.1039666160941124,
"best_model_checkpoint": "./dino-base-2023_11_27-with_custom_head/checkpoint-43952",
"epoch": 90.0,
"eval_steps": 500,
"global_step": 48240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.93,
"learning_rate": 0.01,
"loss": 0.2471,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.39442658092175775,
"eval_f1_macro": 0.40064106475011957,
"eval_f1_micro": 0.5684482898035094,
"eval_loss": 0.2136440873146057,
"eval_roc_auc": 0.7020299868465498,
"eval_runtime": 648.3221,
"eval_samples_per_second": 4.317,
"eval_steps_per_second": 0.27,
"learning_rate": 0.01,
"step": 536
},
{
"epoch": 1.87,
"learning_rate": 0.01,
"loss": 0.2208,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.4090746695248303,
"eval_f1_macro": 0.646218183693934,
"eval_f1_micro": 0.6908665105386417,
"eval_loss": 0.21994435787200928,
"eval_roc_auc": 0.7944947312416977,
"eval_runtime": 649.3168,
"eval_samples_per_second": 4.311,
"eval_steps_per_second": 0.27,
"learning_rate": 0.01,
"step": 1072
},
{
"epoch": 2.8,
"learning_rate": 0.01,
"loss": 0.2181,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.43122543765630583,
"eval_f1_macro": 0.5770940809584216,
"eval_f1_micro": 0.6712328767123288,
"eval_loss": 0.19793672859668732,
"eval_roc_auc": 0.7666801441240528,
"eval_runtime": 645.3042,
"eval_samples_per_second": 4.337,
"eval_steps_per_second": 0.271,
"learning_rate": 0.01,
"step": 1608
},
{
"epoch": 3.73,
"learning_rate": 0.01,
"loss": 0.2187,
"step": 2000
},
{
"epoch": 4.0,
"eval_accuracy": 0.4137191854233655,
"eval_f1_macro": 0.606113442290164,
"eval_f1_micro": 0.7318466985527052,
"eval_loss": 0.1766517609357834,
"eval_roc_auc": 0.8268705246501159,
"eval_runtime": 668.3118,
"eval_samples_per_second": 4.188,
"eval_steps_per_second": 0.262,
"learning_rate": 0.01,
"step": 2144
},
{
"epoch": 4.66,
"learning_rate": 0.01,
"loss": 0.2128,
"step": 2500
},
{
"epoch": 5.0,
"eval_accuracy": 0.43265451947123973,
"eval_f1_macro": 0.5526692766303807,
"eval_f1_micro": 0.7173007124613524,
"eval_loss": 0.17600227892398834,
"eval_roc_auc": 0.8011887560107374,
"eval_runtime": 647.0188,
"eval_samples_per_second": 4.326,
"eval_steps_per_second": 0.27,
"learning_rate": 0.01,
"step": 2680
},
{
"epoch": 5.6,
"learning_rate": 0.01,
"loss": 0.2171,
"step": 3000
},
{
"epoch": 6.0,
"eval_accuracy": 0.4226509467667024,
"eval_f1_macro": 0.5183527283365527,
"eval_f1_micro": 0.7108839135498899,
"eval_loss": 0.1992170512676239,
"eval_roc_auc": 0.8061565099276957,
"eval_runtime": 650.6673,
"eval_samples_per_second": 4.302,
"eval_steps_per_second": 0.269,
"learning_rate": 0.01,
"step": 3216
},
{
"epoch": 6.53,
"learning_rate": 0.01,
"loss": 0.2108,
"step": 3500
},
{
"epoch": 7.0,
"eval_accuracy": 0.41729188996070027,
"eval_f1_macro": 0.6054999995580052,
"eval_f1_micro": 0.7338740337092892,
"eval_loss": 0.16948619484901428,
"eval_roc_auc": 0.8232094683978447,
"eval_runtime": 649.3312,
"eval_samples_per_second": 4.311,
"eval_steps_per_second": 0.27,
"learning_rate": 0.01,
"step": 3752
},
{
"epoch": 7.46,
"learning_rate": 0.01,
"loss": 0.2147,
"step": 4000
},
{
"epoch": 8.0,
"eval_accuracy": 0.4226509467667024,
"eval_f1_macro": 0.6176021212641017,
"eval_f1_micro": 0.7440822902466228,
"eval_loss": 0.16185873746871948,
"eval_roc_auc": 0.8341874074405569,
"eval_runtime": 663.7112,
"eval_samples_per_second": 4.217,
"eval_steps_per_second": 0.264,
"learning_rate": 0.01,
"step": 4288
},
{
"epoch": 8.4,
"learning_rate": 0.01,
"loss": 0.2112,
"step": 4500
},
{
"epoch": 9.0,
"eval_accuracy": 0.43086816720257237,
"eval_f1_macro": 0.6402287128969387,
"eval_f1_micro": 0.7336987336987336,
"eval_loss": 0.17079614102840424,
"eval_roc_auc": 0.8244850070361233,
"eval_runtime": 656.7931,
"eval_samples_per_second": 4.262,
"eval_steps_per_second": 0.266,
"learning_rate": 0.01,
"step": 4824
},
{
"epoch": 9.33,
"learning_rate": 0.01,
"loss": 0.216,
"step": 5000
},
{
"epoch": 10.0,
"eval_accuracy": 0.42836727402643804,
"eval_f1_macro": 0.643393584513474,
"eval_f1_micro": 0.7493341591824095,
"eval_loss": 0.1750514954328537,
"eval_roc_auc": 0.83718619371997,
"eval_runtime": 657.1366,
"eval_samples_per_second": 4.259,
"eval_steps_per_second": 0.266,
"learning_rate": 0.01,
"step": 5360
},
{
"epoch": 10.26,
"learning_rate": 0.01,
"loss": 0.2151,
"step": 5500
},
{
"epoch": 11.0,
"eval_accuracy": 0.43372633083244017,
"eval_f1_macro": 0.6292914445814428,
"eval_f1_micro": 0.7241767706883986,
"eval_loss": 0.17014054954051971,
"eval_roc_auc": 0.8078434001509203,
"eval_runtime": 662.7207,
"eval_samples_per_second": 4.223,
"eval_steps_per_second": 0.264,
"learning_rate": 0.01,
"step": 5896
},
{
"epoch": 11.19,
"learning_rate": 0.01,
"loss": 0.213,
"step": 6000
},
{
"epoch": 12.0,
"eval_accuracy": 0.3565559128260093,
"eval_f1_macro": 0.5371100777543761,
"eval_f1_micro": 0.6629161350191394,
"eval_loss": 0.2409001588821411,
"eval_roc_auc": 0.8034829311226885,
"eval_runtime": 657.3534,
"eval_samples_per_second": 4.258,
"eval_steps_per_second": 0.266,
"learning_rate": 0.01,
"step": 6432
},
{
"epoch": 12.13,
"learning_rate": 0.01,
"loss": 0.2161,
"step": 6500
},
{
"epoch": 13.0,
"eval_accuracy": 0.4194355126831011,
"eval_f1_macro": 0.6243034191518838,
"eval_f1_micro": 0.7290690310322989,
"eval_loss": 0.1704823523759842,
"eval_roc_auc": 0.820743148916754,
"eval_runtime": 665.336,
"eval_samples_per_second": 4.207,
"eval_steps_per_second": 0.263,
"learning_rate": 0.01,
"step": 6968
},
{
"epoch": 13.06,
"learning_rate": 0.01,
"loss": 0.2145,
"step": 7000
},
{
"epoch": 13.99,
"learning_rate": 0.01,
"loss": 0.2136,
"step": 7500
},
{
"epoch": 14.0,
"eval_accuracy": 0.44265809217577706,
"eval_f1_macro": 0.6215367292632343,
"eval_f1_micro": 0.734259379462547,
"eval_loss": 0.16933664679527283,
"eval_roc_auc": 0.8177495297062425,
"eval_runtime": 654.4339,
"eval_samples_per_second": 4.277,
"eval_steps_per_second": 0.267,
"learning_rate": 0.01,
"step": 7504
},
{
"epoch": 14.93,
"learning_rate": 0.001,
"loss": 0.1826,
"step": 8000
},
{
"epoch": 15.0,
"eval_accuracy": 0.4969632011432655,
"eval_f1_macro": 0.7459554779815883,
"eval_f1_micro": 0.7967720076387605,
"eval_loss": 0.13880470395088196,
"eval_roc_auc": 0.8648126874646008,
"eval_runtime": 668.3546,
"eval_samples_per_second": 4.188,
"eval_steps_per_second": 0.262,
"learning_rate": 0.001,
"step": 8040
},
{
"epoch": 15.86,
"learning_rate": 0.001,
"loss": 0.1731,
"step": 8500
},
{
"epoch": 16.0,
"eval_accuracy": 0.5155412647374062,
"eval_f1_macro": 0.7385190479208986,
"eval_f1_micro": 0.8016270337922403,
"eval_loss": 0.14762958884239197,
"eval_roc_auc": 0.8631052728177114,
"eval_runtime": 657.5159,
"eval_samples_per_second": 4.257,
"eval_steps_per_second": 0.266,
"learning_rate": 0.001,
"step": 8576
},
{
"epoch": 16.79,
"learning_rate": 0.001,
"loss": 0.1649,
"step": 9000
},
{
"epoch": 17.0,
"eval_accuracy": 0.5023222579492675,
"eval_f1_macro": 0.7692855132157861,
"eval_f1_micro": 0.813318908522659,
"eval_loss": 0.1351146697998047,
"eval_roc_auc": 0.8811667807242409,
"eval_runtime": 655.4487,
"eval_samples_per_second": 4.27,
"eval_steps_per_second": 0.267,
"learning_rate": 0.001,
"step": 9112
},
{
"epoch": 17.72,
"learning_rate": 0.001,
"loss": 0.1624,
"step": 9500
},
{
"epoch": 18.0,
"eval_accuracy": 0.5212575919971418,
"eval_f1_macro": 0.7714136364578609,
"eval_f1_micro": 0.8185593067340675,
"eval_loss": 0.13848340511322021,
"eval_roc_auc": 0.8837626029149266,
"eval_runtime": 662.3967,
"eval_samples_per_second": 4.226,
"eval_steps_per_second": 0.264,
"learning_rate": 0.001,
"step": 9648
},
{
"epoch": 18.66,
"learning_rate": 0.001,
"loss": 0.1576,
"step": 10000
},
{
"epoch": 19.0,
"eval_accuracy": 0.5205430510896749,
"eval_f1_macro": 0.7631806100991755,
"eval_f1_micro": 0.8175969609705288,
"eval_loss": 0.13018544018268585,
"eval_roc_auc": 0.8779148602729905,
"eval_runtime": 679.9157,
"eval_samples_per_second": 4.117,
"eval_steps_per_second": 0.257,
"learning_rate": 0.001,
"step": 10184
},
{
"epoch": 19.59,
"learning_rate": 0.001,
"loss": 0.1544,
"step": 10500
},
{
"epoch": 20.0,
"eval_accuracy": 0.5319757056091461,
"eval_f1_macro": 0.7721701876863485,
"eval_f1_micro": 0.8229907814143413,
"eval_loss": 0.12343110144138336,
"eval_roc_auc": 0.8780568409783432,
"eval_runtime": 658.9399,
"eval_samples_per_second": 4.248,
"eval_steps_per_second": 0.266,
"learning_rate": 0.001,
"step": 10720
},
{
"epoch": 20.52,
"learning_rate": 0.001,
"loss": 0.1542,
"step": 11000
},
{
"epoch": 21.0,
"eval_accuracy": 0.525902107895677,
"eval_f1_macro": 0.7754587726434372,
"eval_f1_micro": 0.8267645466032855,
"eval_loss": 0.13044790923595428,
"eval_roc_auc": 0.8884477411328827,
"eval_runtime": 659.4337,
"eval_samples_per_second": 4.245,
"eval_steps_per_second": 0.265,
"learning_rate": 0.001,
"step": 11256
},
{
"epoch": 21.46,
"learning_rate": 0.001,
"loss": 0.1525,
"step": 11500
},
{
"epoch": 22.0,
"eval_accuracy": 0.5376920328688818,
"eval_f1_macro": 0.76461964189607,
"eval_f1_micro": 0.8176823176823177,
"eval_loss": 0.12196117639541626,
"eval_roc_auc": 0.8724977198539325,
"eval_runtime": 674.7285,
"eval_samples_per_second": 4.148,
"eval_steps_per_second": 0.259,
"learning_rate": 0.001,
"step": 11792
},
{
"epoch": 22.39,
"learning_rate": 0.001,
"loss": 0.1505,
"step": 12000
},
{
"epoch": 23.0,
"eval_accuracy": 0.5255448374419436,
"eval_f1_macro": 0.7872363331907107,
"eval_f1_micro": 0.8264049955396968,
"eval_loss": 0.13109837472438812,
"eval_roc_auc": 0.891814004155335,
"eval_runtime": 658.8901,
"eval_samples_per_second": 4.248,
"eval_steps_per_second": 0.266,
"learning_rate": 0.001,
"step": 12328
},
{
"epoch": 23.32,
"learning_rate": 0.001,
"loss": 0.1515,
"step": 12500
},
{
"epoch": 24.0,
"eval_accuracy": 0.5316184351554126,
"eval_f1_macro": 0.7629977302664256,
"eval_f1_micro": 0.8188446438586652,
"eval_loss": 0.12468679994344711,
"eval_roc_auc": 0.8736890703040258,
"eval_runtime": 649.926,
"eval_samples_per_second": 4.307,
"eval_steps_per_second": 0.269,
"learning_rate": 0.001,
"step": 12864
},
{
"epoch": 24.25,
"learning_rate": 0.001,
"loss": 0.1471,
"step": 13000
},
{
"epoch": 25.0,
"eval_accuracy": 0.5226866738120758,
"eval_f1_macro": 0.7726394339166129,
"eval_f1_micro": 0.8258118937157316,
"eval_loss": 0.1264602392911911,
"eval_roc_auc": 0.8875076814616129,
"eval_runtime": 661.9223,
"eval_samples_per_second": 4.229,
"eval_steps_per_second": 0.264,
"learning_rate": 0.001,
"step": 13400
},
{
"epoch": 25.19,
"learning_rate": 0.001,
"loss": 0.1475,
"step": 13500
},
{
"epoch": 26.0,
"eval_accuracy": 0.5330475169703466,
"eval_f1_macro": 0.7834057788273457,
"eval_f1_micro": 0.8301059554256485,
"eval_loss": 0.12767212092876434,
"eval_roc_auc": 0.8867279830614062,
"eval_runtime": 654.8587,
"eval_samples_per_second": 4.274,
"eval_steps_per_second": 0.267,
"learning_rate": 0.001,
"step": 13936
},
{
"epoch": 26.12,
"learning_rate": 0.001,
"loss": 0.1484,
"step": 14000
},
{
"epoch": 27.0,
"eval_accuracy": 0.5398356555912825,
"eval_f1_macro": 0.7661233818608114,
"eval_f1_micro": 0.8218290555693994,
"eval_loss": 0.12360195070505142,
"eval_roc_auc": 0.8754121833739898,
"eval_runtime": 658.0747,
"eval_samples_per_second": 4.253,
"eval_steps_per_second": 0.266,
"learning_rate": 0.001,
"step": 14472
},
{
"epoch": 27.05,
"learning_rate": 0.001,
"loss": 0.1475,
"step": 14500
},
{
"epoch": 27.99,
"learning_rate": 0.001,
"loss": 0.1472,
"step": 15000
},
{
"epoch": 28.0,
"eval_accuracy": 0.5401929260450161,
"eval_f1_macro": 0.7728524137767288,
"eval_f1_micro": 0.8228913409388442,
"eval_loss": 0.1256585270166397,
"eval_roc_auc": 0.8757945197904818,
"eval_runtime": 655.1438,
"eval_samples_per_second": 4.272,
"eval_steps_per_second": 0.267,
"learning_rate": 0.001,
"step": 15008
},
{
"epoch": 28.92,
"learning_rate": 0.0001,
"loss": 0.1379,
"step": 15500
},
{
"epoch": 29.0,
"eval_accuracy": 0.5451947123972848,
"eval_f1_macro": 0.7891009081675242,
"eval_f1_micro": 0.8352094482376824,
"eval_loss": 0.11994459480047226,
"eval_roc_auc": 0.8865305601939816,
"eval_runtime": 657.6068,
"eval_samples_per_second": 4.256,
"eval_steps_per_second": 0.266,
"learning_rate": 0.0001,
"step": 15544
},
{
"epoch": 29.85,
"learning_rate": 0.0001,
"loss": 0.1349,
"step": 16000
},
{
"epoch": 30.0,
"eval_accuracy": 0.5487674169346195,
"eval_f1_macro": 0.7943922901606704,
"eval_f1_micro": 0.841315916787615,
"eval_loss": 0.11564121395349503,
"eval_roc_auc": 0.895138845939546,
"eval_runtime": 659.4245,
"eval_samples_per_second": 4.245,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 16080
},
{
"epoch": 30.78,
"learning_rate": 0.0001,
"loss": 0.1326,
"step": 16500
},
{
"epoch": 31.0,
"eval_accuracy": 0.5498392282958199,
"eval_f1_macro": 0.7983364672353336,
"eval_f1_micro": 0.84037558685446,
"eval_loss": 0.11515345424413681,
"eval_roc_auc": 0.8960665621862288,
"eval_runtime": 652.6109,
"eval_samples_per_second": 4.289,
"eval_steps_per_second": 0.268,
"learning_rate": 0.0001,
"step": 16616
},
{
"epoch": 31.72,
"learning_rate": 0.0001,
"loss": 0.1321,
"step": 17000
},
{
"epoch": 32.0,
"eval_accuracy": 0.5509110396570204,
"eval_f1_macro": 0.7911364775051637,
"eval_f1_micro": 0.8385913426265589,
"eval_loss": 0.11371538788080215,
"eval_roc_auc": 0.89024940896342,
"eval_runtime": 663.8067,
"eval_samples_per_second": 4.217,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 17152
},
{
"epoch": 32.65,
"learning_rate": 0.0001,
"loss": 0.1294,
"step": 17500
},
{
"epoch": 33.0,
"eval_accuracy": 0.5512683101107538,
"eval_f1_macro": 0.7924466456431516,
"eval_f1_micro": 0.8406133545115768,
"eval_loss": 0.11363548040390015,
"eval_roc_auc": 0.891620070857568,
"eval_runtime": 641.5792,
"eval_samples_per_second": 4.363,
"eval_steps_per_second": 0.273,
"learning_rate": 0.0001,
"step": 17688
},
{
"epoch": 33.58,
"learning_rate": 0.0001,
"loss": 0.1297,
"step": 18000
},
{
"epoch": 34.0,
"eval_accuracy": 0.5551982851018221,
"eval_f1_macro": 0.7994952163993189,
"eval_f1_micro": 0.8438879816125325,
"eval_loss": 0.1100151389837265,
"eval_roc_auc": 0.8965325171836247,
"eval_runtime": 666.5011,
"eval_samples_per_second": 4.2,
"eval_steps_per_second": 0.263,
"learning_rate": 0.0001,
"step": 18224
},
{
"epoch": 34.51,
"learning_rate": 0.0001,
"loss": 0.1296,
"step": 18500
},
{
"epoch": 35.0,
"eval_accuracy": 0.5569846373704894,
"eval_f1_macro": 0.7958547104209327,
"eval_f1_micro": 0.8430563978168587,
"eval_loss": 0.11022897809743881,
"eval_roc_auc": 0.8952561397957243,
"eval_runtime": 651.2057,
"eval_samples_per_second": 4.298,
"eval_steps_per_second": 0.269,
"learning_rate": 0.0001,
"step": 18760
},
{
"epoch": 35.45,
"learning_rate": 0.0001,
"loss": 0.1276,
"step": 19000
},
{
"epoch": 36.0,
"eval_accuracy": 0.5584137191854234,
"eval_f1_macro": 0.7954026209465306,
"eval_f1_micro": 0.8428501708150318,
"eval_loss": 0.11038191616535187,
"eval_roc_auc": 0.8933002510838177,
"eval_runtime": 659.4359,
"eval_samples_per_second": 4.245,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 19296
},
{
"epoch": 36.38,
"learning_rate": 0.0001,
"loss": 0.1264,
"step": 19500
},
{
"epoch": 37.0,
"eval_accuracy": 0.5644873168988924,
"eval_f1_macro": 0.8073166114826349,
"eval_f1_micro": 0.8467775842392937,
"eval_loss": 0.11108729988336563,
"eval_roc_auc": 0.9003547734565809,
"eval_runtime": 649.1518,
"eval_samples_per_second": 4.312,
"eval_steps_per_second": 0.27,
"learning_rate": 0.0001,
"step": 19832
},
{
"epoch": 37.31,
"learning_rate": 0.0001,
"loss": 0.1279,
"step": 20000
},
{
"epoch": 38.0,
"eval_accuracy": 0.5662736691675598,
"eval_f1_macro": 0.8059718169069215,
"eval_f1_micro": 0.8457475869604808,
"eval_loss": 0.11050034314393997,
"eval_roc_auc": 0.8964483289954395,
"eval_runtime": 661.6908,
"eval_samples_per_second": 4.23,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 20368
},
{
"epoch": 38.25,
"learning_rate": 0.0001,
"loss": 0.1231,
"step": 20500
},
{
"epoch": 39.0,
"eval_accuracy": 0.5623436941764915,
"eval_f1_macro": 0.810452280144873,
"eval_f1_micro": 0.8481141692150868,
"eval_loss": 0.1114969253540039,
"eval_roc_auc": 0.9016441984475202,
"eval_runtime": 645.0365,
"eval_samples_per_second": 4.339,
"eval_steps_per_second": 0.271,
"learning_rate": 0.0001,
"step": 20904
},
{
"epoch": 39.18,
"learning_rate": 0.0001,
"loss": 0.1276,
"step": 21000
},
{
"epoch": 40.0,
"eval_accuracy": 0.564844587352626,
"eval_f1_macro": 0.7998757908744976,
"eval_f1_micro": 0.8442948914040991,
"eval_loss": 0.10886894911527634,
"eval_roc_auc": 0.8932229387863774,
"eval_runtime": 664.3024,
"eval_samples_per_second": 4.213,
"eval_steps_per_second": 0.263,
"learning_rate": 0.0001,
"step": 21440
},
{
"epoch": 40.11,
"learning_rate": 0.0001,
"loss": 0.121,
"step": 21500
},
{
"epoch": 41.0,
"eval_accuracy": 0.5627009646302251,
"eval_f1_macro": 0.8014681171854418,
"eval_f1_micro": 0.8454512239678481,
"eval_loss": 0.10981705039739609,
"eval_roc_auc": 0.8953198348828445,
"eval_runtime": 652.9462,
"eval_samples_per_second": 4.287,
"eval_steps_per_second": 0.268,
"learning_rate": 0.0001,
"step": 21976
},
{
"epoch": 41.04,
"learning_rate": 0.0001,
"loss": 0.1241,
"step": 22000
},
{
"epoch": 41.98,
"learning_rate": 0.0001,
"loss": 0.1229,
"step": 22500
},
{
"epoch": 42.0,
"eval_accuracy": 0.5619864237227581,
"eval_f1_macro": 0.8009464266100834,
"eval_f1_micro": 0.8459390554813646,
"eval_loss": 0.10872387140989304,
"eval_roc_auc": 0.8965747458488879,
"eval_runtime": 655.0026,
"eval_samples_per_second": 4.273,
"eval_steps_per_second": 0.267,
"learning_rate": 0.0001,
"step": 22512
},
{
"epoch": 42.91,
"learning_rate": 0.0001,
"loss": 0.1227,
"step": 23000
},
{
"epoch": 43.0,
"eval_accuracy": 0.5687745623436942,
"eval_f1_macro": 0.8067169938735436,
"eval_f1_micro": 0.8468292682926829,
"eval_loss": 0.10882638394832611,
"eval_roc_auc": 0.8956753719148074,
"eval_runtime": 660.8489,
"eval_samples_per_second": 4.235,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 23048
},
{
"epoch": 43.84,
"learning_rate": 0.0001,
"loss": 0.1221,
"step": 23500
},
{
"epoch": 44.0,
"eval_accuracy": 0.5673454805287603,
"eval_f1_macro": 0.8065744332804647,
"eval_f1_micro": 0.8476346632659257,
"eval_loss": 0.10762665420770645,
"eval_roc_auc": 0.897394560379003,
"eval_runtime": 657.9808,
"eval_samples_per_second": 4.254,
"eval_steps_per_second": 0.266,
"learning_rate": 0.0001,
"step": 23584
},
{
"epoch": 44.78,
"learning_rate": 0.0001,
"loss": 0.1191,
"step": 24000
},
{
"epoch": 45.0,
"eval_accuracy": 0.5698463737048947,
"eval_f1_macro": 0.817339787108748,
"eval_f1_micro": 0.8507955568898229,
"eval_loss": 0.10689569264650345,
"eval_roc_auc": 0.9027498177813545,
"eval_runtime": 671.4032,
"eval_samples_per_second": 4.169,
"eval_steps_per_second": 0.261,
"learning_rate": 0.0001,
"step": 24120
},
{
"epoch": 45.71,
"learning_rate": 0.0001,
"loss": 0.1212,
"step": 24500
},
{
"epoch": 46.0,
"eval_accuracy": 0.563415505537692,
"eval_f1_macro": 0.8173548628519673,
"eval_f1_micro": 0.8508932256352808,
"eval_loss": 0.10721632838249207,
"eval_roc_auc": 0.9086343215547117,
"eval_runtime": 661.9534,
"eval_samples_per_second": 4.228,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 24656
},
{
"epoch": 46.64,
"learning_rate": 0.0001,
"loss": 0.1198,
"step": 25000
},
{
"epoch": 47.0,
"eval_accuracy": 0.5687745623436942,
"eval_f1_macro": 0.8090465345914333,
"eval_f1_micro": 0.849293563579278,
"eval_loss": 0.10663535445928574,
"eval_roc_auc": 0.900130416633457,
"eval_runtime": 673.8512,
"eval_samples_per_second": 4.154,
"eval_steps_per_second": 0.26,
"learning_rate": 0.0001,
"step": 25192
},
{
"epoch": 47.57,
"learning_rate": 0.0001,
"loss": 0.1201,
"step": 25500
},
{
"epoch": 48.0,
"eval_accuracy": 0.5652018578063595,
"eval_f1_macro": 0.8082582901487069,
"eval_f1_micro": 0.8484811957569913,
"eval_loss": 0.10762892663478851,
"eval_roc_auc": 0.9002407000277199,
"eval_runtime": 662.4725,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 25728
},
{
"epoch": 48.51,
"learning_rate": 0.0001,
"loss": 0.1189,
"step": 26000
},
{
"epoch": 49.0,
"eval_accuracy": 0.5687745623436942,
"eval_f1_macro": 0.8152048365492516,
"eval_f1_micro": 0.8507597141312836,
"eval_loss": 0.10654111951589584,
"eval_roc_auc": 0.9026532861685821,
"eval_runtime": 679.062,
"eval_samples_per_second": 4.122,
"eval_steps_per_second": 0.258,
"learning_rate": 0.0001,
"step": 26264
},
{
"epoch": 49.44,
"learning_rate": 0.0001,
"loss": 0.1176,
"step": 26500
},
{
"epoch": 50.0,
"eval_accuracy": 0.563415505537692,
"eval_f1_macro": 0.8033757987633436,
"eval_f1_micro": 0.8462940461725396,
"eval_loss": 0.10730718821287155,
"eval_roc_auc": 0.8965096424048645,
"eval_runtime": 643.0995,
"eval_samples_per_second": 4.352,
"eval_steps_per_second": 0.272,
"learning_rate": 0.0001,
"step": 26800
},
{
"epoch": 50.37,
"learning_rate": 0.0001,
"loss": 0.1202,
"step": 27000
},
{
"epoch": 51.0,
"eval_accuracy": 0.5727045373347625,
"eval_f1_macro": 0.8102272806912906,
"eval_f1_micro": 0.8480676328502416,
"eval_loss": 0.10732194036245346,
"eval_roc_auc": 0.8994002578253693,
"eval_runtime": 645.0782,
"eval_samples_per_second": 4.339,
"eval_steps_per_second": 0.271,
"learning_rate": 0.0001,
"step": 27336
},
{
"epoch": 51.31,
"learning_rate": 0.0001,
"loss": 0.1167,
"step": 27500
},
{
"epoch": 52.0,
"eval_accuracy": 0.571632725973562,
"eval_f1_macro": 0.8179079350328715,
"eval_f1_micro": 0.8521687462863933,
"eval_loss": 0.10600127279758453,
"eval_roc_auc": 0.9068707918898985,
"eval_runtime": 661.3351,
"eval_samples_per_second": 4.232,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 27872
},
{
"epoch": 52.24,
"learning_rate": 0.0001,
"loss": 0.1192,
"step": 28000
},
{
"epoch": 53.0,
"eval_accuracy": 0.5712754555198285,
"eval_f1_macro": 0.8127939311574505,
"eval_f1_micro": 0.8507093268940539,
"eval_loss": 0.10628383606672287,
"eval_roc_auc": 0.9009869922949931,
"eval_runtime": 649.2151,
"eval_samples_per_second": 4.311,
"eval_steps_per_second": 0.27,
"learning_rate": 0.0001,
"step": 28408
},
{
"epoch": 53.17,
"learning_rate": 0.0001,
"loss": 0.1156,
"step": 28500
},
{
"epoch": 54.0,
"eval_accuracy": 0.5719899964272954,
"eval_f1_macro": 0.8113280265904282,
"eval_f1_micro": 0.8493415488703637,
"eval_loss": 0.10670817643404007,
"eval_roc_auc": 0.9000055427080821,
"eval_runtime": 659.1604,
"eval_samples_per_second": 4.246,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 28944
},
{
"epoch": 54.1,
"learning_rate": 0.0001,
"loss": 0.1193,
"step": 29000
},
{
"epoch": 55.0,
"eval_accuracy": 0.5727045373347625,
"eval_f1_macro": 0.8116343899877551,
"eval_f1_micro": 0.8490052609300356,
"eval_loss": 0.10690104961395264,
"eval_roc_auc": 0.8994929994035434,
"eval_runtime": 663.4912,
"eval_samples_per_second": 4.219,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 29480
},
{
"epoch": 55.04,
"learning_rate": 0.0001,
"loss": 0.1161,
"step": 29500
},
{
"epoch": 55.97,
"learning_rate": 0.0001,
"loss": 0.116,
"step": 30000
},
{
"epoch": 56.0,
"eval_accuracy": 0.5744908896034298,
"eval_f1_macro": 0.818624257330544,
"eval_f1_micro": 0.8542707589816796,
"eval_loss": 0.10558204352855682,
"eval_roc_auc": 0.9077319963356549,
"eval_runtime": 659.0048,
"eval_samples_per_second": 4.247,
"eval_steps_per_second": 0.266,
"learning_rate": 0.0001,
"step": 30016
},
{
"epoch": 56.9,
"learning_rate": 0.0001,
"loss": 0.1147,
"step": 30500
},
{
"epoch": 57.0,
"eval_accuracy": 0.5730618077884959,
"eval_f1_macro": 0.8113712664273885,
"eval_f1_micro": 0.8505116959064327,
"eval_loss": 0.10627623647451401,
"eval_roc_auc": 0.8980389880708319,
"eval_runtime": 669.8276,
"eval_samples_per_second": 4.179,
"eval_steps_per_second": 0.261,
"learning_rate": 0.0001,
"step": 30552
},
{
"epoch": 57.84,
"learning_rate": 0.0001,
"loss": 0.1139,
"step": 31000
},
{
"epoch": 58.0,
"eval_accuracy": 0.5705609146123616,
"eval_f1_macro": 0.8074486566040442,
"eval_f1_micro": 0.8488632919066383,
"eval_loss": 0.10657747834920883,
"eval_roc_auc": 0.898615796069772,
"eval_runtime": 654.902,
"eval_samples_per_second": 4.274,
"eval_steps_per_second": 0.267,
"learning_rate": 0.0001,
"step": 31088
},
{
"epoch": 58.77,
"learning_rate": 0.0001,
"loss": 0.1143,
"step": 31500
},
{
"epoch": 59.0,
"eval_accuracy": 0.5727045373347625,
"eval_f1_macro": 0.8065194300593265,
"eval_f1_micro": 0.8490646517579673,
"eval_loss": 0.10738535225391388,
"eval_roc_auc": 0.8971341466029096,
"eval_runtime": 661.8186,
"eval_samples_per_second": 4.229,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 31624
},
{
"epoch": 59.7,
"learning_rate": 0.0001,
"loss": 0.1148,
"step": 32000
},
{
"epoch": 60.0,
"eval_accuracy": 0.5694891032511611,
"eval_f1_macro": 0.8079607267977419,
"eval_f1_micro": 0.8498935199269851,
"eval_loss": 0.10777446627616882,
"eval_roc_auc": 0.8980542827803957,
"eval_runtime": 663.1043,
"eval_samples_per_second": 4.221,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 32160
},
{
"epoch": 60.63,
"learning_rate": 0.0001,
"loss": 0.1143,
"step": 32500
},
{
"epoch": 61.0,
"eval_accuracy": 0.572347266881029,
"eval_f1_macro": 0.8159017507862669,
"eval_f1_micro": 0.8512052195976559,
"eval_loss": 0.10536229610443115,
"eval_roc_auc": 0.9010383439575261,
"eval_runtime": 660.7796,
"eval_samples_per_second": 4.236,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 32696
},
{
"epoch": 61.57,
"learning_rate": 0.0001,
"loss": 0.1133,
"step": 33000
},
{
"epoch": 62.0,
"eval_accuracy": 0.5737763486959628,
"eval_f1_macro": 0.808341771577789,
"eval_f1_micro": 0.8495672315006705,
"eval_loss": 0.10581369698047638,
"eval_roc_auc": 0.8973203339377094,
"eval_runtime": 655.4703,
"eval_samples_per_second": 4.27,
"eval_steps_per_second": 0.267,
"learning_rate": 0.0001,
"step": 33232
},
{
"epoch": 62.5,
"learning_rate": 0.0001,
"loss": 0.1134,
"step": 33500
},
{
"epoch": 63.0,
"eval_accuracy": 0.5680600214362272,
"eval_f1_macro": 0.8087952156757714,
"eval_f1_micro": 0.847873368777187,
"eval_loss": 0.1063385158777237,
"eval_roc_auc": 0.899139005299654,
"eval_runtime": 659.92,
"eval_samples_per_second": 4.241,
"eval_steps_per_second": 0.265,
"learning_rate": 0.0001,
"step": 33768
},
{
"epoch": 63.43,
"learning_rate": 0.0001,
"loss": 0.1123,
"step": 34000
},
{
"epoch": 64.0,
"eval_accuracy": 0.5702036441586281,
"eval_f1_macro": 0.8121242156828758,
"eval_f1_micro": 0.8503755754785559,
"eval_loss": 0.10543316602706909,
"eval_roc_auc": 0.8997137007632394,
"eval_runtime": 647.0637,
"eval_samples_per_second": 4.326,
"eval_steps_per_second": 0.27,
"learning_rate": 0.0001,
"step": 34304
},
{
"epoch": 64.37,
"learning_rate": 0.0001,
"loss": 0.1141,
"step": 34500
},
{
"epoch": 65.0,
"eval_accuracy": 0.5730618077884959,
"eval_f1_macro": 0.8099395952334975,
"eval_f1_micro": 0.8494330240737371,
"eval_loss": 0.10500979423522949,
"eval_roc_auc": 0.8988602109578501,
"eval_runtime": 649.7608,
"eval_samples_per_second": 4.308,
"eval_steps_per_second": 0.269,
"learning_rate": 0.0001,
"step": 34840
},
{
"epoch": 65.3,
"learning_rate": 0.0001,
"loss": 0.1104,
"step": 35000
},
{
"epoch": 66.0,
"eval_accuracy": 0.5762772418720972,
"eval_f1_macro": 0.8132610218333434,
"eval_f1_micro": 0.8506980430409072,
"eval_loss": 0.10500740259885788,
"eval_roc_auc": 0.8978957335797464,
"eval_runtime": 652.9454,
"eval_samples_per_second": 4.287,
"eval_steps_per_second": 0.268,
"learning_rate": 0.0001,
"step": 35376
},
{
"epoch": 66.23,
"learning_rate": 0.0001,
"loss": 0.1124,
"step": 35500
},
{
"epoch": 67.0,
"eval_accuracy": 0.5669882100750268,
"eval_f1_macro": 0.8163481281680508,
"eval_f1_micro": 0.8512857399748246,
"eval_loss": 0.10600199550390244,
"eval_roc_auc": 0.9035619176711028,
"eval_runtime": 657.7451,
"eval_samples_per_second": 4.255,
"eval_steps_per_second": 0.266,
"learning_rate": 0.0001,
"step": 35912
},
{
"epoch": 67.16,
"learning_rate": 0.0001,
"loss": 0.1111,
"step": 36000
},
{
"epoch": 68.0,
"eval_accuracy": 0.5680600214362272,
"eval_f1_macro": 0.8156635542444352,
"eval_f1_micro": 0.8511868899867453,
"eval_loss": 0.10536548495292664,
"eval_roc_auc": 0.9018856620506218,
"eval_runtime": 656.6156,
"eval_samples_per_second": 4.263,
"eval_steps_per_second": 0.267,
"learning_rate": 0.0001,
"step": 36448
},
{
"epoch": 68.1,
"learning_rate": 0.0001,
"loss": 0.1097,
"step": 36500
},
{
"epoch": 69.0,
"eval_accuracy": 0.5673454805287603,
"eval_f1_macro": 0.8109923908741982,
"eval_f1_micro": 0.8500871237156763,
"eval_loss": 0.1056470051407814,
"eval_roc_auc": 0.9021192764421861,
"eval_runtime": 657.5814,
"eval_samples_per_second": 4.257,
"eval_steps_per_second": 0.266,
"learning_rate": 0.0001,
"step": 36984
},
{
"epoch": 69.03,
"learning_rate": 0.0001,
"loss": 0.1106,
"step": 37000
},
{
"epoch": 69.96,
"learning_rate": 0.0001,
"loss": 0.1096,
"step": 37500
},
{
"epoch": 70.0,
"eval_accuracy": 0.5673454805287603,
"eval_f1_macro": 0.8119494369118779,
"eval_f1_micro": 0.8500514558992676,
"eval_loss": 0.1059202253818512,
"eval_roc_auc": 0.8997405001477684,
"eval_runtime": 661.9686,
"eval_samples_per_second": 4.228,
"eval_steps_per_second": 0.264,
"learning_rate": 0.0001,
"step": 37520
},
{
"epoch": 70.9,
"learning_rate": 0.0001,
"loss": 0.1097,
"step": 38000
},
{
"epoch": 71.0,
"eval_accuracy": 0.5737763486959628,
"eval_f1_macro": 0.8171777200998823,
"eval_f1_micro": 0.8517297200071947,
"eval_loss": 0.10546696186065674,
"eval_roc_auc": 0.9037381432590107,
"eval_runtime": 654.6273,
"eval_samples_per_second": 4.276,
"eval_steps_per_second": 0.267,
"learning_rate": 0.0001,
"step": 38056
},
{
"epoch": 71.83,
"learning_rate": 1e-05,
"loss": 0.1084,
"step": 38500
},
{
"epoch": 72.0,
"eval_accuracy": 0.5630582350839586,
"eval_f1_macro": 0.8063147670569071,
"eval_f1_micro": 0.84692696594993,
"eval_loss": 0.10735420882701874,
"eval_roc_auc": 0.8960515366190057,
"eval_runtime": 658.0374,
"eval_samples_per_second": 4.254,
"eval_steps_per_second": 0.266,
"learning_rate": 1e-05,
"step": 38592
},
{
"epoch": 72.76,
"learning_rate": 1e-05,
"loss": 0.1091,
"step": 39000
},
{
"epoch": 73.0,
"eval_accuracy": 0.5734190782422294,
"eval_f1_macro": 0.8171103344103756,
"eval_f1_micro": 0.8525143029208069,
"eval_loss": 0.10438621789216995,
"eval_roc_auc": 0.9027705800247602,
"eval_runtime": 664.8235,
"eval_samples_per_second": 4.21,
"eval_steps_per_second": 0.263,
"learning_rate": 1e-05,
"step": 39128
},
{
"epoch": 73.69,
"learning_rate": 1e-05,
"loss": 0.1051,
"step": 39500
},
{
"epoch": 74.0,
"eval_accuracy": 0.571632725973562,
"eval_f1_macro": 0.8187356068324974,
"eval_f1_micro": 0.8532934131736527,
"eval_loss": 0.10408420860767365,
"eval_roc_auc": 0.9050291111780234,
"eval_runtime": 673.5967,
"eval_samples_per_second": 4.155,
"eval_steps_per_second": 0.26,
"learning_rate": 1e-05,
"step": 39664
},
{
"epoch": 74.63,
"learning_rate": 1e-05,
"loss": 0.1069,
"step": 40000
},
{
"epoch": 75.0,
"eval_accuracy": 0.5698463737048947,
"eval_f1_macro": 0.8154818599661411,
"eval_f1_micro": 0.8505788712011578,
"eval_loss": 0.10555566847324371,
"eval_roc_auc": 0.9012750442052373,
"eval_runtime": 657.1771,
"eval_samples_per_second": 4.259,
"eval_steps_per_second": 0.266,
"learning_rate": 1e-05,
"step": 40200
},
{
"epoch": 75.56,
"learning_rate": 1e-05,
"loss": 0.1079,
"step": 40500
},
{
"epoch": 76.0,
"eval_accuracy": 0.572347266881029,
"eval_f1_macro": 0.8153806167901401,
"eval_f1_micro": 0.8517114840883114,
"eval_loss": 0.10429207235574722,
"eval_roc_auc": 0.9026594578808756,
"eval_runtime": 664.6613,
"eval_samples_per_second": 4.211,
"eval_steps_per_second": 0.263,
"learning_rate": 1e-05,
"step": 40736
},
{
"epoch": 76.49,
"learning_rate": 1e-05,
"loss": 0.1072,
"step": 41000
},
{
"epoch": 77.0,
"eval_accuracy": 0.5780635941407646,
"eval_f1_macro": 0.8187874152985335,
"eval_f1_micro": 0.853607002716571,
"eval_loss": 0.10398340970277786,
"eval_roc_auc": 0.9026235354990767,
"eval_runtime": 664.3977,
"eval_samples_per_second": 4.213,
"eval_steps_per_second": 0.263,
"learning_rate": 1e-05,
"step": 41272
},
{
"epoch": 77.43,
"learning_rate": 1e-05,
"loss": 0.105,
"step": 41500
},
{
"epoch": 78.0,
"eval_accuracy": 0.5712754555198285,
"eval_f1_macro": 0.8150330415154026,
"eval_f1_micro": 0.8514409743156879,
"eval_loss": 0.10431113094091415,
"eval_roc_auc": 0.9018006351128146,
"eval_runtime": 660.4258,
"eval_samples_per_second": 4.238,
"eval_steps_per_second": 0.265,
"learning_rate": 1e-05,
"step": 41808
},
{
"epoch": 78.36,
"learning_rate": 1e-05,
"loss": 0.1061,
"step": 42000
},
{
"epoch": 79.0,
"eval_accuracy": 0.5734190782422294,
"eval_f1_macro": 0.8180685346674,
"eval_f1_micro": 0.8525934861278649,
"eval_loss": 0.10427288711071014,
"eval_roc_auc": 0.9023377306953573,
"eval_runtime": 662.4252,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 0.264,
"learning_rate": 1e-05,
"step": 42344
},
{
"epoch": 79.29,
"learning_rate": 1e-05,
"loss": 0.1045,
"step": 42500
},
{
"epoch": 80.0,
"eval_accuracy": 0.5719899964272954,
"eval_f1_macro": 0.8176554657248621,
"eval_f1_micro": 0.8512023142288917,
"eval_loss": 0.10507169365882874,
"eval_roc_auc": 0.9017990921847413,
"eval_runtime": 662.5216,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 0.264,
"learning_rate": 1e-05,
"step": 42880
},
{
"epoch": 80.22,
"learning_rate": 1e-05,
"loss": 0.1062,
"step": 43000
},
{
"epoch": 81.0,
"eval_accuracy": 0.5694891032511611,
"eval_f1_macro": 0.8157488543073257,
"eval_f1_micro": 0.8500577402297453,
"eval_loss": 0.10686225444078445,
"eval_roc_auc": 0.8984886750378719,
"eval_runtime": 655.8239,
"eval_samples_per_second": 4.268,
"eval_steps_per_second": 0.267,
"learning_rate": 1e-05,
"step": 43416
},
{
"epoch": 81.16,
"learning_rate": 1e-05,
"loss": 0.1057,
"step": 43500
},
{
"epoch": 82.0,
"eval_accuracy": 0.5734190782422294,
"eval_f1_macro": 0.8184447896335341,
"eval_f1_micro": 0.8530242056579975,
"eval_loss": 0.1039666160941124,
"eval_roc_auc": 0.9039006170654286,
"eval_runtime": 659.9314,
"eval_samples_per_second": 4.241,
"eval_steps_per_second": 0.265,
"learning_rate": 1e-05,
"step": 43952
},
{
"epoch": 82.09,
"learning_rate": 1e-05,
"loss": 0.1073,
"step": 44000
},
{
"epoch": 83.0,
"eval_accuracy": 0.5712754555198285,
"eval_f1_macro": 0.8151349204991644,
"eval_f1_micro": 0.8503600634688149,
"eval_loss": 0.10477207601070404,
"eval_roc_auc": 0.8973831902752079,
"eval_runtime": 660.9853,
"eval_samples_per_second": 4.235,
"eval_steps_per_second": 0.265,
"learning_rate": 1e-05,
"step": 44488
},
{
"epoch": 83.02,
"learning_rate": 1e-05,
"loss": 0.1053,
"step": 44500
},
{
"epoch": 83.96,
"learning_rate": 1e-05,
"loss": 0.1059,
"step": 45000
},
{
"epoch": 84.0,
"eval_accuracy": 0.5741336191496963,
"eval_f1_macro": 0.8177615611039802,
"eval_f1_micro": 0.8525637163342773,
"eval_loss": 0.1043851226568222,
"eval_roc_auc": 0.9026457060993854,
"eval_runtime": 661.1385,
"eval_samples_per_second": 4.234,
"eval_steps_per_second": 0.265,
"learning_rate": 1e-05,
"step": 45024
},
{
"epoch": 84.89,
"learning_rate": 1e-05,
"loss": 0.1054,
"step": 45500
},
{
"epoch": 85.0,
"eval_accuracy": 0.5702036441586281,
"eval_f1_macro": 0.8148620661290662,
"eval_f1_micro": 0.8505413415593056,
"eval_loss": 0.10626183450222015,
"eval_roc_auc": 0.9002829286929828,
"eval_runtime": 667.6418,
"eval_samples_per_second": 4.192,
"eval_steps_per_second": 0.262,
"learning_rate": 1e-05,
"step": 45560
},
{
"epoch": 85.82,
"learning_rate": 1e-05,
"loss": 0.1046,
"step": 46000
},
{
"epoch": 86.0,
"eval_accuracy": 0.5727045373347625,
"eval_f1_macro": 0.8160800717420196,
"eval_f1_micro": 0.8525306469231703,
"eval_loss": 0.10432148724794388,
"eval_roc_auc": 0.9003534996708482,
"eval_runtime": 656.1196,
"eval_samples_per_second": 4.266,
"eval_steps_per_second": 0.267,
"learning_rate": 1e-05,
"step": 46096
},
{
"epoch": 86.75,
"learning_rate": 1e-05,
"loss": 0.105,
"step": 46500
},
{
"epoch": 87.0,
"eval_accuracy": 0.5719899964272954,
"eval_f1_macro": 0.8185429098530308,
"eval_f1_micro": 0.8532148646214804,
"eval_loss": 0.10474765300750732,
"eval_roc_auc": 0.904161869591144,
"eval_runtime": 662.4652,
"eval_samples_per_second": 4.225,
"eval_steps_per_second": 0.264,
"learning_rate": 1e-05,
"step": 46632
},
{
"epoch": 87.69,
"learning_rate": 1e-05,
"loss": 0.1029,
"step": 47000
},
{
"epoch": 88.0,
"eval_accuracy": 0.5759199714183637,
"eval_f1_macro": 0.8155503181233792,
"eval_f1_micro": 0.8518207954000958,
"eval_loss": 0.10432733595371246,
"eval_roc_auc": 0.9041143079632091,
"eval_runtime": 659.661,
"eval_samples_per_second": 4.243,
"eval_steps_per_second": 0.265,
"learning_rate": 1e-05,
"step": 47168
},
{
"epoch": 88.62,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1059,
"step": 47500
},
{
"epoch": 89.0,
"eval_accuracy": 0.5787781350482315,
"eval_f1_macro": 0.8178092875894164,
"eval_f1_micro": 0.8538892570946557,
"eval_loss": 0.10400809347629547,
"eval_roc_auc": 0.9033957884012571,
"eval_runtime": 659.0133,
"eval_samples_per_second": 4.247,
"eval_steps_per_second": 0.266,
"learning_rate": 1.0000000000000002e-06,
"step": 47704
},
{
"epoch": 89.55,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.1047,
"step": 48000
},
{
"epoch": 90.0,
"eval_accuracy": 0.5694891032511611,
"eval_f1_macro": 0.8136056007290207,
"eval_f1_micro": 0.8504655833485485,
"eval_loss": 0.10469033569097519,
"eval_roc_auc": 0.8982986976684738,
"eval_runtime": 653.5683,
"eval_samples_per_second": 4.283,
"eval_steps_per_second": 0.268,
"learning_rate": 1.0000000000000002e-06,
"step": 48240
},
{
"epoch": 90.0,
"learning_rate": 1.0000000000000002e-06,
"step": 48240,
"total_flos": 6.13340503159962e+19,
"train_loss": 0.13765035268679188,
"train_runtime": 238196.7751,
"train_samples_per_second": 3.237,
"train_steps_per_second": 0.203
}
],
"logging_steps": 500,
"max_steps": 48240,
"num_train_epochs": 90,
"save_steps": 500,
"total_flos": 6.13340503159962e+19,
"trial_name": null,
"trial_params": null
}