{ "best_metric": 0.8323029366306027, "best_model_checkpoint": "mit-b2-VF2-finetuned-memes/checkpoint-320", "epoch": 19.987654320987655, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.49, "learning_rate": 3e-05, "loss": 1.5055, "step": 10 }, { "epoch": 0.99, "learning_rate": 6e-05, "loss": 1.3077, "step": 20 }, { "epoch": 0.99, "eval_accuracy": 0.5548686244204019, "eval_f1": 0.528612348642737, "eval_loss": 1.1682769060134888, "eval_precision": 0.5621381932725592, "eval_recall": 0.5548686244204019, "eval_runtime": 24.8047, "eval_samples_per_second": 52.167, "eval_steps_per_second": 0.847, "step": 20 }, { "epoch": 1.49, "learning_rate": 9e-05, "loss": 1.1418, "step": 30 }, { "epoch": 1.99, "learning_rate": 0.00012, "loss": 0.9359, "step": 40 }, { "epoch": 1.99, "eval_accuracy": 0.6731066460587326, "eval_f1": 0.6535186565920124, "eval_loss": 0.8572518825531006, "eval_precision": 0.6806723513626268, "eval_recall": 0.6731066460587326, "eval_runtime": 24.0008, "eval_samples_per_second": 53.915, "eval_steps_per_second": 0.875, "step": 40 }, { "epoch": 2.49, "learning_rate": 0.00011666666666666667, "loss": 0.8433, "step": 50 }, { "epoch": 2.99, "learning_rate": 0.00011333333333333333, "loss": 0.7219, "step": 60 }, { "epoch": 2.99, "eval_accuracy": 0.7272024729520865, "eval_f1": 0.7245846570171044, "eval_loss": 0.7105928659439087, "eval_precision": 0.7358764523852896, "eval_recall": 0.7272024729520865, "eval_runtime": 24.3322, "eval_samples_per_second": 53.181, "eval_steps_per_second": 0.863, "step": 60 }, { "epoch": 3.49, "learning_rate": 0.00011, "loss": 0.639, "step": 70 }, { "epoch": 3.99, "learning_rate": 0.00010666666666666667, "loss": 0.6013, "step": 80 }, { "epoch": 3.99, "eval_accuracy": 0.7550231839258115, "eval_f1": 0.7557556279759768, "eval_loss": 0.6444658041000366, "eval_precision": 0.7685934069718168, "eval_recall": 0.7550231839258115, "eval_runtime": 23.9013, "eval_samples_per_second": 54.139, "eval_steps_per_second": 0.879, "step": 80 }, { "epoch": 4.49, "learning_rate": 0.00010333333333333334, "loss": 0.4977, "step": 90 }, { "epoch": 4.99, "learning_rate": 0.0001, "loss": 0.5243, "step": 100 }, { "epoch": 4.99, "eval_accuracy": 0.7573415765069552, "eval_f1": 0.7583802674435394, "eval_loss": 0.6717090606689453, "eval_precision": 0.8077053699577921, "eval_recall": 0.7573415765069552, "eval_runtime": 24.2773, "eval_samples_per_second": 53.301, "eval_steps_per_second": 0.865, "step": 100 }, { "epoch": 5.49, "learning_rate": 9.666666666666667e-05, "loss": 0.4352, "step": 110 }, { "epoch": 5.99, "learning_rate": 9.333333333333334e-05, "loss": 0.4409, "step": 120 }, { "epoch": 5.99, "eval_accuracy": 0.8068006182380216, "eval_f1": 0.7989048233772276, "eval_loss": 0.531519889831543, "eval_precision": 0.8027399261022145, "eval_recall": 0.8068006182380216, "eval_runtime": 24.4159, "eval_samples_per_second": 52.998, "eval_steps_per_second": 0.86, "step": 120 }, { "epoch": 6.49, "learning_rate": 9e-05, "loss": 0.3684, "step": 130 }, { "epoch": 6.99, "learning_rate": 8.666666666666667e-05, "loss": 0.3325, "step": 140 }, { "epoch": 6.99, "eval_accuracy": 0.8230293663060279, "eval_f1": 0.815761738328179, "eval_loss": 0.5158553719520569, "eval_precision": 0.8236186656882729, "eval_recall": 0.8230293663060279, "eval_runtime": 24.8705, "eval_samples_per_second": 52.029, "eval_steps_per_second": 0.844, "step": 140 }, { "epoch": 7.49, "learning_rate": 8.333333333333333e-05, "loss": 0.304, "step": 150 }, { "epoch": 7.99, "learning_rate": 7.999999999999999e-05, "loss": 0.2719, "step": 160 }, { "epoch": 7.99, "eval_accuracy": 0.821483771251932, "eval_f1": 0.8202048206608414, "eval_loss": 0.5249665379524231, "eval_precision": 0.8226765366850811, "eval_recall": 0.821483771251932, "eval_runtime": 24.5067, "eval_samples_per_second": 52.802, "eval_steps_per_second": 0.857, "step": 160 }, { "epoch": 8.49, "learning_rate": 7.666666666666667e-05, "loss": 0.2209, "step": 170 }, { "epoch": 8.99, "learning_rate": 7.333333333333334e-05, "loss": 0.242, "step": 180 }, { "epoch": 8.99, "eval_accuracy": 0.8276661514683153, "eval_f1": 0.8267595186206215, "eval_loss": 0.5087464451789856, "eval_precision": 0.8259545635423114, "eval_recall": 0.8276661514683153, "eval_runtime": 24.2235, "eval_samples_per_second": 53.419, "eval_steps_per_second": 0.867, "step": 180 }, { "epoch": 9.49, "learning_rate": 7.000000000000001e-05, "loss": 0.2021, "step": 190 }, { "epoch": 9.99, "learning_rate": 6.666666666666667e-05, "loss": 0.2247, "step": 200 }, { "epoch": 9.99, "eval_accuracy": 0.821483771251932, "eval_f1": 0.8217910575245788, "eval_loss": 0.5312910079956055, "eval_precision": 0.8274625356055352, "eval_recall": 0.821483771251932, "eval_runtime": 23.8959, "eval_samples_per_second": 54.152, "eval_steps_per_second": 0.879, "step": 200 }, { "epoch": 10.49, "learning_rate": 6.333333333333333e-05, "loss": 0.1864, "step": 210 }, { "epoch": 10.99, "learning_rate": 6e-05, "loss": 0.1955, "step": 220 }, { "epoch": 10.99, "eval_accuracy": 0.8129829984544049, "eval_f1": 0.8073030128905387, "eval_loss": 0.6167330741882324, "eval_precision": 0.8062473668401103, "eval_recall": 0.8129829984544049, "eval_runtime": 24.1623, "eval_samples_per_second": 53.555, "eval_steps_per_second": 0.869, "step": 220 }, { "epoch": 11.49, "learning_rate": 5.6666666666666664e-05, "loss": 0.1668, "step": 230 }, { "epoch": 11.99, "learning_rate": 5.333333333333333e-05, "loss": 0.1567, "step": 240 }, { "epoch": 11.99, "eval_accuracy": 0.8168469860896446, "eval_f1": 0.8172720638114461, "eval_loss": 0.585921049118042, "eval_precision": 0.818523777219596, "eval_recall": 0.8168469860896446, "eval_runtime": 24.2627, "eval_samples_per_second": 53.333, "eval_steps_per_second": 0.866, "step": 240 }, { "epoch": 12.49, "learning_rate": 5e-05, "loss": 0.1458, "step": 250 }, { "epoch": 12.99, "learning_rate": 4.666666666666667e-05, "loss": 0.1479, "step": 260 }, { "epoch": 12.99, "eval_accuracy": 0.821483771251932, "eval_f1": 0.8178479998623709, "eval_loss": 0.5937514901161194, "eval_precision": 0.8169057022013864, "eval_recall": 0.821483771251932, "eval_runtime": 24.5791, "eval_samples_per_second": 52.646, "eval_steps_per_second": 0.854, "step": 260 }, { "epoch": 13.49, "learning_rate": 4.3333333333333334e-05, "loss": 0.1242, "step": 270 }, { "epoch": 13.99, "learning_rate": 3.9999999999999996e-05, "loss": 0.1241, "step": 280 }, { "epoch": 13.99, "eval_accuracy": 0.8261205564142194, "eval_f1": 0.8238689741768054, "eval_loss": 0.6187295317649841, "eval_precision": 0.8233844424179447, "eval_recall": 0.8261205564142194, "eval_runtime": 23.6967, "eval_samples_per_second": 54.607, "eval_steps_per_second": 0.886, "step": 280 }, { "epoch": 14.49, "learning_rate": 3.666666666666667e-05, "loss": 0.1133, "step": 290 }, { "epoch": 14.99, "learning_rate": 3.3333333333333335e-05, "loss": 0.1114, "step": 300 }, { "epoch": 14.99, "eval_accuracy": 0.8261205564142194, "eval_f1": 0.8293208039295025, "eval_loss": 0.6419451832771301, "eval_precision": 0.8351235142682792, "eval_recall": 0.8261205564142194, "eval_runtime": 24.6341, "eval_samples_per_second": 52.529, "eval_steps_per_second": 0.852, "step": 300 }, { "epoch": 15.49, "learning_rate": 3e-05, "loss": 0.1134, "step": 310 }, { "epoch": 15.99, "learning_rate": 2.6666666666666667e-05, "loss": 0.1022, "step": 320 }, { "epoch": 15.99, "eval_accuracy": 0.8323029366306027, "eval_f1": 0.8293880616510334, "eval_loss": 0.6321794986724854, "eval_precision": 0.8283975183256338, "eval_recall": 0.8323029366306027, "eval_runtime": 24.0004, "eval_samples_per_second": 53.916, "eval_steps_per_second": 0.875, "step": 320 }, { "epoch": 16.49, "learning_rate": 2.3333333333333336e-05, "loss": 0.099, "step": 330 }, { "epoch": 16.99, "learning_rate": 1.9999999999999998e-05, "loss": 0.0941, "step": 340 }, { "epoch": 16.99, "eval_accuracy": 0.8268933539412674, "eval_f1": 0.8263211093818184, "eval_loss": 0.6594778895378113, "eval_precision": 0.826589853355648, "eval_recall": 0.8268933539412674, "eval_runtime": 24.9004, "eval_samples_per_second": 51.967, "eval_steps_per_second": 0.843, "step": 340 }, { "epoch": 17.49, "learning_rate": 1.6666666666666667e-05, "loss": 0.0908, "step": 350 }, { "epoch": 17.99, "learning_rate": 1.3333333333333333e-05, "loss": 0.0935, "step": 360 }, { "epoch": 17.99, "eval_accuracy": 0.8268933539412674, "eval_f1": 0.8236963320695928, "eval_loss": 0.6673519015312195, "eval_precision": 0.8217985118762411, "eval_recall": 0.8268933539412674, "eval_runtime": 24.9117, "eval_samples_per_second": 51.943, "eval_steps_per_second": 0.843, "step": 360 }, { "epoch": 18.49, "learning_rate": 9.999999999999999e-06, "loss": 0.0893, "step": 370 }, { "epoch": 18.99, "learning_rate": 6.666666666666667e-06, "loss": 0.089, "step": 380 }, { "epoch": 18.99, "eval_accuracy": 0.8253477588871716, "eval_f1": 0.8235241860756777, "eval_loss": 0.6533293724060059, "eval_precision": 0.8222313831803589, "eval_recall": 0.8253477588871716, "eval_runtime": 24.9409, "eval_samples_per_second": 51.883, "eval_steps_per_second": 0.842, "step": 380 }, { "epoch": 19.49, "learning_rate": 3.3333333333333333e-06, "loss": 0.0861, "step": 390 }, { "epoch": 19.99, "learning_rate": 0.0, "loss": 0.0794, "step": 400 }, { "epoch": 19.99, "eval_accuracy": 0.8307573415765069, "eval_f1": 0.8286939083150942, "eval_loss": 0.6546534895896912, "eval_precision": 0.8272186656187493, "eval_recall": 0.8307573415765069, "eval_runtime": 25.4487, "eval_samples_per_second": 50.847, "eval_steps_per_second": 0.825, "step": 400 }, { "epoch": 19.99, "step": 400, "total_flos": 1.1809647563061068e+19, "train_loss": 0.35424828216433524, "train_runtime": 5673.0945, "train_samples_per_second": 18.24, "train_steps_per_second": 0.071 } ], "max_steps": 400, "num_train_epochs": 20, "total_flos": 1.1809647563061068e+19, "trial_name": null, "trial_params": null }