|
{ |
|
"best_metric": 0.8323029366306027, |
|
"best_model_checkpoint": "mit-b2-VF2-finetuned-memes/checkpoint-320", |
|
"epoch": 19.987654320987655, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5055, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3077, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5548686244204019, |
|
"eval_f1": 0.528612348642737, |
|
"eval_loss": 1.1682769060134888, |
|
"eval_precision": 0.5621381932725592, |
|
"eval_recall": 0.5548686244204019, |
|
"eval_runtime": 24.8047, |
|
"eval_samples_per_second": 52.167, |
|
"eval_steps_per_second": 0.847, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9e-05, |
|
"loss": 1.1418, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00012, |
|
"loss": 0.9359, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.6731066460587326, |
|
"eval_f1": 0.6535186565920124, |
|
"eval_loss": 0.8572518825531006, |
|
"eval_precision": 0.6806723513626268, |
|
"eval_recall": 0.6731066460587326, |
|
"eval_runtime": 24.0008, |
|
"eval_samples_per_second": 53.915, |
|
"eval_steps_per_second": 0.875, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00011666666666666667, |
|
"loss": 0.8433, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00011333333333333333, |
|
"loss": 0.7219, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7272024729520865, |
|
"eval_f1": 0.7245846570171044, |
|
"eval_loss": 0.7105928659439087, |
|
"eval_precision": 0.7358764523852896, |
|
"eval_recall": 0.7272024729520865, |
|
"eval_runtime": 24.3322, |
|
"eval_samples_per_second": 53.181, |
|
"eval_steps_per_second": 0.863, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00011, |
|
"loss": 0.639, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.6013, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.7550231839258115, |
|
"eval_f1": 0.7557556279759768, |
|
"eval_loss": 0.6444658041000366, |
|
"eval_precision": 0.7685934069718168, |
|
"eval_recall": 0.7550231839258115, |
|
"eval_runtime": 23.9013, |
|
"eval_samples_per_second": 54.139, |
|
"eval_steps_per_second": 0.879, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00010333333333333334, |
|
"loss": 0.4977, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5243, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.7573415765069552, |
|
"eval_f1": 0.7583802674435394, |
|
"eval_loss": 0.6717090606689453, |
|
"eval_precision": 0.8077053699577921, |
|
"eval_recall": 0.7573415765069552, |
|
"eval_runtime": 24.2773, |
|
"eval_samples_per_second": 53.301, |
|
"eval_steps_per_second": 0.865, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.4352, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.4409, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.8068006182380216, |
|
"eval_f1": 0.7989048233772276, |
|
"eval_loss": 0.531519889831543, |
|
"eval_precision": 0.8027399261022145, |
|
"eval_recall": 0.8068006182380216, |
|
"eval_runtime": 24.4159, |
|
"eval_samples_per_second": 52.998, |
|
"eval_steps_per_second": 0.86, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 9e-05, |
|
"loss": 0.3684, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.3325, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8230293663060279, |
|
"eval_f1": 0.815761738328179, |
|
"eval_loss": 0.5158553719520569, |
|
"eval_precision": 0.8236186656882729, |
|
"eval_recall": 0.8230293663060279, |
|
"eval_runtime": 24.8705, |
|
"eval_samples_per_second": 52.029, |
|
"eval_steps_per_second": 0.844, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.304, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 7.999999999999999e-05, |
|
"loss": 0.2719, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.821483771251932, |
|
"eval_f1": 0.8202048206608414, |
|
"eval_loss": 0.5249665379524231, |
|
"eval_precision": 0.8226765366850811, |
|
"eval_recall": 0.821483771251932, |
|
"eval_runtime": 24.5067, |
|
"eval_samples_per_second": 52.802, |
|
"eval_steps_per_second": 0.857, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.2209, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 7.333333333333334e-05, |
|
"loss": 0.242, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8276661514683153, |
|
"eval_f1": 0.8267595186206215, |
|
"eval_loss": 0.5087464451789856, |
|
"eval_precision": 0.8259545635423114, |
|
"eval_recall": 0.8276661514683153, |
|
"eval_runtime": 24.2235, |
|
"eval_samples_per_second": 53.419, |
|
"eval_steps_per_second": 0.867, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 0.2021, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.2247, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.821483771251932, |
|
"eval_f1": 0.8217910575245788, |
|
"eval_loss": 0.5312910079956055, |
|
"eval_precision": 0.8274625356055352, |
|
"eval_recall": 0.821483771251932, |
|
"eval_runtime": 23.8959, |
|
"eval_samples_per_second": 54.152, |
|
"eval_steps_per_second": 0.879, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.1864, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 6e-05, |
|
"loss": 0.1955, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.8129829984544049, |
|
"eval_f1": 0.8073030128905387, |
|
"eval_loss": 0.6167330741882324, |
|
"eval_precision": 0.8062473668401103, |
|
"eval_recall": 0.8129829984544049, |
|
"eval_runtime": 24.1623, |
|
"eval_samples_per_second": 53.555, |
|
"eval_steps_per_second": 0.869, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 5.6666666666666664e-05, |
|
"loss": 0.1668, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.1567, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.8168469860896446, |
|
"eval_f1": 0.8172720638114461, |
|
"eval_loss": 0.585921049118042, |
|
"eval_precision": 0.818523777219596, |
|
"eval_recall": 0.8168469860896446, |
|
"eval_runtime": 24.2627, |
|
"eval_samples_per_second": 53.333, |
|
"eval_steps_per_second": 0.866, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1458, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.1479, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.821483771251932, |
|
"eval_f1": 0.8178479998623709, |
|
"eval_loss": 0.5937514901161194, |
|
"eval_precision": 0.8169057022013864, |
|
"eval_recall": 0.821483771251932, |
|
"eval_runtime": 24.5791, |
|
"eval_samples_per_second": 52.646, |
|
"eval_steps_per_second": 0.854, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.1242, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 3.9999999999999996e-05, |
|
"loss": 0.1241, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.8261205564142194, |
|
"eval_f1": 0.8238689741768054, |
|
"eval_loss": 0.6187295317649841, |
|
"eval_precision": 0.8233844424179447, |
|
"eval_recall": 0.8261205564142194, |
|
"eval_runtime": 23.6967, |
|
"eval_samples_per_second": 54.607, |
|
"eval_steps_per_second": 0.886, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.666666666666667e-05, |
|
"loss": 0.1133, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1114, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8261205564142194, |
|
"eval_f1": 0.8293208039295025, |
|
"eval_loss": 0.6419451832771301, |
|
"eval_precision": 0.8351235142682792, |
|
"eval_recall": 0.8261205564142194, |
|
"eval_runtime": 24.6341, |
|
"eval_samples_per_second": 52.529, |
|
"eval_steps_per_second": 0.852, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1134, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.1022, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.8323029366306027, |
|
"eval_f1": 0.8293880616510334, |
|
"eval_loss": 0.6321794986724854, |
|
"eval_precision": 0.8283975183256338, |
|
"eval_recall": 0.8323029366306027, |
|
"eval_runtime": 24.0004, |
|
"eval_samples_per_second": 53.916, |
|
"eval_steps_per_second": 0.875, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.099, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.0941, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8268933539412674, |
|
"eval_f1": 0.8263211093818184, |
|
"eval_loss": 0.6594778895378113, |
|
"eval_precision": 0.826589853355648, |
|
"eval_recall": 0.8268933539412674, |
|
"eval_runtime": 24.9004, |
|
"eval_samples_per_second": 51.967, |
|
"eval_steps_per_second": 0.843, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0908, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0935, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.8268933539412674, |
|
"eval_f1": 0.8236963320695928, |
|
"eval_loss": 0.6673519015312195, |
|
"eval_precision": 0.8217985118762411, |
|
"eval_recall": 0.8268933539412674, |
|
"eval_runtime": 24.9117, |
|
"eval_samples_per_second": 51.943, |
|
"eval_steps_per_second": 0.843, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.0893, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.089, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.8253477588871716, |
|
"eval_f1": 0.8235241860756777, |
|
"eval_loss": 0.6533293724060059, |
|
"eval_precision": 0.8222313831803589, |
|
"eval_recall": 0.8253477588871716, |
|
"eval_runtime": 24.9409, |
|
"eval_samples_per_second": 51.883, |
|
"eval_steps_per_second": 0.842, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0861, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.0794, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.8307573415765069, |
|
"eval_f1": 0.8286939083150942, |
|
"eval_loss": 0.6546534895896912, |
|
"eval_precision": 0.8272186656187493, |
|
"eval_recall": 0.8307573415765069, |
|
"eval_runtime": 25.4487, |
|
"eval_samples_per_second": 50.847, |
|
"eval_steps_per_second": 0.825, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"step": 400, |
|
"total_flos": 1.1809647563061068e+19, |
|
"train_loss": 0.35424828216433524, |
|
"train_runtime": 5673.0945, |
|
"train_samples_per_second": 18.24, |
|
"train_steps_per_second": 0.071 |
|
} |
|
], |
|
"max_steps": 400, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.1809647563061068e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|