mit-b2-VF2-finetuned-memes / trainer_state.json
paul
End of training
df1bba5
{
"best_metric": 0.8323029366306027,
"best_model_checkpoint": "mit-b2-VF2-finetuned-memes/checkpoint-320",
"epoch": 19.987654320987655,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.49,
"learning_rate": 3e-05,
"loss": 1.5055,
"step": 10
},
{
"epoch": 0.99,
"learning_rate": 6e-05,
"loss": 1.3077,
"step": 20
},
{
"epoch": 0.99,
"eval_accuracy": 0.5548686244204019,
"eval_f1": 0.528612348642737,
"eval_loss": 1.1682769060134888,
"eval_precision": 0.5621381932725592,
"eval_recall": 0.5548686244204019,
"eval_runtime": 24.8047,
"eval_samples_per_second": 52.167,
"eval_steps_per_second": 0.847,
"step": 20
},
{
"epoch": 1.49,
"learning_rate": 9e-05,
"loss": 1.1418,
"step": 30
},
{
"epoch": 1.99,
"learning_rate": 0.00012,
"loss": 0.9359,
"step": 40
},
{
"epoch": 1.99,
"eval_accuracy": 0.6731066460587326,
"eval_f1": 0.6535186565920124,
"eval_loss": 0.8572518825531006,
"eval_precision": 0.6806723513626268,
"eval_recall": 0.6731066460587326,
"eval_runtime": 24.0008,
"eval_samples_per_second": 53.915,
"eval_steps_per_second": 0.875,
"step": 40
},
{
"epoch": 2.49,
"learning_rate": 0.00011666666666666667,
"loss": 0.8433,
"step": 50
},
{
"epoch": 2.99,
"learning_rate": 0.00011333333333333333,
"loss": 0.7219,
"step": 60
},
{
"epoch": 2.99,
"eval_accuracy": 0.7272024729520865,
"eval_f1": 0.7245846570171044,
"eval_loss": 0.7105928659439087,
"eval_precision": 0.7358764523852896,
"eval_recall": 0.7272024729520865,
"eval_runtime": 24.3322,
"eval_samples_per_second": 53.181,
"eval_steps_per_second": 0.863,
"step": 60
},
{
"epoch": 3.49,
"learning_rate": 0.00011,
"loss": 0.639,
"step": 70
},
{
"epoch": 3.99,
"learning_rate": 0.00010666666666666667,
"loss": 0.6013,
"step": 80
},
{
"epoch": 3.99,
"eval_accuracy": 0.7550231839258115,
"eval_f1": 0.7557556279759768,
"eval_loss": 0.6444658041000366,
"eval_precision": 0.7685934069718168,
"eval_recall": 0.7550231839258115,
"eval_runtime": 23.9013,
"eval_samples_per_second": 54.139,
"eval_steps_per_second": 0.879,
"step": 80
},
{
"epoch": 4.49,
"learning_rate": 0.00010333333333333334,
"loss": 0.4977,
"step": 90
},
{
"epoch": 4.99,
"learning_rate": 0.0001,
"loss": 0.5243,
"step": 100
},
{
"epoch": 4.99,
"eval_accuracy": 0.7573415765069552,
"eval_f1": 0.7583802674435394,
"eval_loss": 0.6717090606689453,
"eval_precision": 0.8077053699577921,
"eval_recall": 0.7573415765069552,
"eval_runtime": 24.2773,
"eval_samples_per_second": 53.301,
"eval_steps_per_second": 0.865,
"step": 100
},
{
"epoch": 5.49,
"learning_rate": 9.666666666666667e-05,
"loss": 0.4352,
"step": 110
},
{
"epoch": 5.99,
"learning_rate": 9.333333333333334e-05,
"loss": 0.4409,
"step": 120
},
{
"epoch": 5.99,
"eval_accuracy": 0.8068006182380216,
"eval_f1": 0.7989048233772276,
"eval_loss": 0.531519889831543,
"eval_precision": 0.8027399261022145,
"eval_recall": 0.8068006182380216,
"eval_runtime": 24.4159,
"eval_samples_per_second": 52.998,
"eval_steps_per_second": 0.86,
"step": 120
},
{
"epoch": 6.49,
"learning_rate": 9e-05,
"loss": 0.3684,
"step": 130
},
{
"epoch": 6.99,
"learning_rate": 8.666666666666667e-05,
"loss": 0.3325,
"step": 140
},
{
"epoch": 6.99,
"eval_accuracy": 0.8230293663060279,
"eval_f1": 0.815761738328179,
"eval_loss": 0.5158553719520569,
"eval_precision": 0.8236186656882729,
"eval_recall": 0.8230293663060279,
"eval_runtime": 24.8705,
"eval_samples_per_second": 52.029,
"eval_steps_per_second": 0.844,
"step": 140
},
{
"epoch": 7.49,
"learning_rate": 8.333333333333333e-05,
"loss": 0.304,
"step": 150
},
{
"epoch": 7.99,
"learning_rate": 7.999999999999999e-05,
"loss": 0.2719,
"step": 160
},
{
"epoch": 7.99,
"eval_accuracy": 0.821483771251932,
"eval_f1": 0.8202048206608414,
"eval_loss": 0.5249665379524231,
"eval_precision": 0.8226765366850811,
"eval_recall": 0.821483771251932,
"eval_runtime": 24.5067,
"eval_samples_per_second": 52.802,
"eval_steps_per_second": 0.857,
"step": 160
},
{
"epoch": 8.49,
"learning_rate": 7.666666666666667e-05,
"loss": 0.2209,
"step": 170
},
{
"epoch": 8.99,
"learning_rate": 7.333333333333334e-05,
"loss": 0.242,
"step": 180
},
{
"epoch": 8.99,
"eval_accuracy": 0.8276661514683153,
"eval_f1": 0.8267595186206215,
"eval_loss": 0.5087464451789856,
"eval_precision": 0.8259545635423114,
"eval_recall": 0.8276661514683153,
"eval_runtime": 24.2235,
"eval_samples_per_second": 53.419,
"eval_steps_per_second": 0.867,
"step": 180
},
{
"epoch": 9.49,
"learning_rate": 7.000000000000001e-05,
"loss": 0.2021,
"step": 190
},
{
"epoch": 9.99,
"learning_rate": 6.666666666666667e-05,
"loss": 0.2247,
"step": 200
},
{
"epoch": 9.99,
"eval_accuracy": 0.821483771251932,
"eval_f1": 0.8217910575245788,
"eval_loss": 0.5312910079956055,
"eval_precision": 0.8274625356055352,
"eval_recall": 0.821483771251932,
"eval_runtime": 23.8959,
"eval_samples_per_second": 54.152,
"eval_steps_per_second": 0.879,
"step": 200
},
{
"epoch": 10.49,
"learning_rate": 6.333333333333333e-05,
"loss": 0.1864,
"step": 210
},
{
"epoch": 10.99,
"learning_rate": 6e-05,
"loss": 0.1955,
"step": 220
},
{
"epoch": 10.99,
"eval_accuracy": 0.8129829984544049,
"eval_f1": 0.8073030128905387,
"eval_loss": 0.6167330741882324,
"eval_precision": 0.8062473668401103,
"eval_recall": 0.8129829984544049,
"eval_runtime": 24.1623,
"eval_samples_per_second": 53.555,
"eval_steps_per_second": 0.869,
"step": 220
},
{
"epoch": 11.49,
"learning_rate": 5.6666666666666664e-05,
"loss": 0.1668,
"step": 230
},
{
"epoch": 11.99,
"learning_rate": 5.333333333333333e-05,
"loss": 0.1567,
"step": 240
},
{
"epoch": 11.99,
"eval_accuracy": 0.8168469860896446,
"eval_f1": 0.8172720638114461,
"eval_loss": 0.585921049118042,
"eval_precision": 0.818523777219596,
"eval_recall": 0.8168469860896446,
"eval_runtime": 24.2627,
"eval_samples_per_second": 53.333,
"eval_steps_per_second": 0.866,
"step": 240
},
{
"epoch": 12.49,
"learning_rate": 5e-05,
"loss": 0.1458,
"step": 250
},
{
"epoch": 12.99,
"learning_rate": 4.666666666666667e-05,
"loss": 0.1479,
"step": 260
},
{
"epoch": 12.99,
"eval_accuracy": 0.821483771251932,
"eval_f1": 0.8178479998623709,
"eval_loss": 0.5937514901161194,
"eval_precision": 0.8169057022013864,
"eval_recall": 0.821483771251932,
"eval_runtime": 24.5791,
"eval_samples_per_second": 52.646,
"eval_steps_per_second": 0.854,
"step": 260
},
{
"epoch": 13.49,
"learning_rate": 4.3333333333333334e-05,
"loss": 0.1242,
"step": 270
},
{
"epoch": 13.99,
"learning_rate": 3.9999999999999996e-05,
"loss": 0.1241,
"step": 280
},
{
"epoch": 13.99,
"eval_accuracy": 0.8261205564142194,
"eval_f1": 0.8238689741768054,
"eval_loss": 0.6187295317649841,
"eval_precision": 0.8233844424179447,
"eval_recall": 0.8261205564142194,
"eval_runtime": 23.6967,
"eval_samples_per_second": 54.607,
"eval_steps_per_second": 0.886,
"step": 280
},
{
"epoch": 14.49,
"learning_rate": 3.666666666666667e-05,
"loss": 0.1133,
"step": 290
},
{
"epoch": 14.99,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.1114,
"step": 300
},
{
"epoch": 14.99,
"eval_accuracy": 0.8261205564142194,
"eval_f1": 0.8293208039295025,
"eval_loss": 0.6419451832771301,
"eval_precision": 0.8351235142682792,
"eval_recall": 0.8261205564142194,
"eval_runtime": 24.6341,
"eval_samples_per_second": 52.529,
"eval_steps_per_second": 0.852,
"step": 300
},
{
"epoch": 15.49,
"learning_rate": 3e-05,
"loss": 0.1134,
"step": 310
},
{
"epoch": 15.99,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.1022,
"step": 320
},
{
"epoch": 15.99,
"eval_accuracy": 0.8323029366306027,
"eval_f1": 0.8293880616510334,
"eval_loss": 0.6321794986724854,
"eval_precision": 0.8283975183256338,
"eval_recall": 0.8323029366306027,
"eval_runtime": 24.0004,
"eval_samples_per_second": 53.916,
"eval_steps_per_second": 0.875,
"step": 320
},
{
"epoch": 16.49,
"learning_rate": 2.3333333333333336e-05,
"loss": 0.099,
"step": 330
},
{
"epoch": 16.99,
"learning_rate": 1.9999999999999998e-05,
"loss": 0.0941,
"step": 340
},
{
"epoch": 16.99,
"eval_accuracy": 0.8268933539412674,
"eval_f1": 0.8263211093818184,
"eval_loss": 0.6594778895378113,
"eval_precision": 0.826589853355648,
"eval_recall": 0.8268933539412674,
"eval_runtime": 24.9004,
"eval_samples_per_second": 51.967,
"eval_steps_per_second": 0.843,
"step": 340
},
{
"epoch": 17.49,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0908,
"step": 350
},
{
"epoch": 17.99,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0935,
"step": 360
},
{
"epoch": 17.99,
"eval_accuracy": 0.8268933539412674,
"eval_f1": 0.8236963320695928,
"eval_loss": 0.6673519015312195,
"eval_precision": 0.8217985118762411,
"eval_recall": 0.8268933539412674,
"eval_runtime": 24.9117,
"eval_samples_per_second": 51.943,
"eval_steps_per_second": 0.843,
"step": 360
},
{
"epoch": 18.49,
"learning_rate": 9.999999999999999e-06,
"loss": 0.0893,
"step": 370
},
{
"epoch": 18.99,
"learning_rate": 6.666666666666667e-06,
"loss": 0.089,
"step": 380
},
{
"epoch": 18.99,
"eval_accuracy": 0.8253477588871716,
"eval_f1": 0.8235241860756777,
"eval_loss": 0.6533293724060059,
"eval_precision": 0.8222313831803589,
"eval_recall": 0.8253477588871716,
"eval_runtime": 24.9409,
"eval_samples_per_second": 51.883,
"eval_steps_per_second": 0.842,
"step": 380
},
{
"epoch": 19.49,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.0861,
"step": 390
},
{
"epoch": 19.99,
"learning_rate": 0.0,
"loss": 0.0794,
"step": 400
},
{
"epoch": 19.99,
"eval_accuracy": 0.8307573415765069,
"eval_f1": 0.8286939083150942,
"eval_loss": 0.6546534895896912,
"eval_precision": 0.8272186656187493,
"eval_recall": 0.8307573415765069,
"eval_runtime": 25.4487,
"eval_samples_per_second": 50.847,
"eval_steps_per_second": 0.825,
"step": 400
},
{
"epoch": 19.99,
"step": 400,
"total_flos": 1.1809647563061068e+19,
"train_loss": 0.35424828216433524,
"train_runtime": 5673.0945,
"train_samples_per_second": 18.24,
"train_steps_per_second": 0.071
}
],
"max_steps": 400,
"num_train_epochs": 20,
"total_flos": 1.1809647563061068e+19,
"trial_name": null,
"trial_params": null
}