electra-adapter / trainer_state.json
tejaskamtam's picture
End of training
08afa7f verified
{
"best_metric": 2.0336806774139404,
"best_model_checkpoint": "finetuning/output/electra-adapter-finetuned_xe_ey_fae/checkpoint-19000",
"epoch": 2.642433616911575,
"eval_steps": 500,
"global_step": 20500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 9.785167998625076e-06,
"loss": 3.9488,
"step": 500
},
{
"epoch": 0.06,
"eval_accuracy": 0.5508950432882589,
"eval_loss": 3.1499977111816406,
"eval_runtime": 85.1217,
"eval_samples_per_second": 182.386,
"eval_steps_per_second": 22.803,
"step": 500
},
{
"epoch": 0.13,
"learning_rate": 9.57033599725015e-06,
"loss": 2.942,
"step": 1000
},
{
"epoch": 0.13,
"eval_accuracy": 0.5680209177510359,
"eval_loss": 2.584392547607422,
"eval_runtime": 79.4716,
"eval_samples_per_second": 195.353,
"eval_steps_per_second": 24.424,
"step": 1000
},
{
"epoch": 0.19,
"learning_rate": 9.355503995875225e-06,
"loss": 2.6751,
"step": 1500
},
{
"epoch": 0.19,
"eval_accuracy": 0.578970986434046,
"eval_loss": 2.444335699081421,
"eval_runtime": 87.5675,
"eval_samples_per_second": 177.292,
"eval_steps_per_second": 22.166,
"step": 1500
},
{
"epoch": 0.26,
"learning_rate": 9.140671994500302e-06,
"loss": 2.582,
"step": 2000
},
{
"epoch": 0.26,
"eval_accuracy": 0.5868782143731802,
"eval_loss": 2.3700673580169678,
"eval_runtime": 83.7436,
"eval_samples_per_second": 185.387,
"eval_steps_per_second": 23.178,
"step": 2000
},
{
"epoch": 0.32,
"learning_rate": 8.926269657128126e-06,
"loss": 2.5267,
"step": 2500
},
{
"epoch": 0.32,
"eval_accuracy": 0.5937291646823507,
"eval_loss": 2.309689998626709,
"eval_runtime": 81.3517,
"eval_samples_per_second": 190.838,
"eval_steps_per_second": 23.859,
"step": 2500
},
{
"epoch": 0.39,
"learning_rate": 8.711437655753203e-06,
"loss": 2.4722,
"step": 3000
},
{
"epoch": 0.39,
"eval_accuracy": 0.5985969269659629,
"eval_loss": 2.2695114612579346,
"eval_runtime": 87.4381,
"eval_samples_per_second": 177.554,
"eval_steps_per_second": 22.199,
"step": 3000
},
{
"epoch": 0.45,
"learning_rate": 8.497035318381027e-06,
"loss": 2.4289,
"step": 3500
},
{
"epoch": 0.45,
"eval_accuracy": 0.602404170197503,
"eval_loss": 2.2328779697418213,
"eval_runtime": 83.7759,
"eval_samples_per_second": 185.316,
"eval_steps_per_second": 23.169,
"step": 3500
},
{
"epoch": 0.52,
"learning_rate": 8.282203317006102e-06,
"loss": 2.404,
"step": 4000
},
{
"epoch": 0.52,
"eval_accuracy": 0.6055254061674608,
"eval_loss": 2.206317901611328,
"eval_runtime": 87.3965,
"eval_samples_per_second": 177.639,
"eval_steps_per_second": 22.209,
"step": 4000
},
{
"epoch": 0.58,
"learning_rate": 8.067371315631177e-06,
"loss": 2.3826,
"step": 4500
},
{
"epoch": 0.58,
"eval_accuracy": 0.6086694296803393,
"eval_loss": 2.183983087539673,
"eval_runtime": 87.0314,
"eval_samples_per_second": 178.384,
"eval_steps_per_second": 22.302,
"step": 4500
},
{
"epoch": 0.64,
"learning_rate": 7.852539314256252e-06,
"loss": 2.3633,
"step": 5000
},
{
"epoch": 0.64,
"eval_accuracy": 0.6108753723178051,
"eval_loss": 2.1645586490631104,
"eval_runtime": 83.9383,
"eval_samples_per_second": 184.957,
"eval_steps_per_second": 23.124,
"step": 5000
},
{
"epoch": 0.71,
"learning_rate": 7.637707312881327e-06,
"loss": 2.3425,
"step": 5500
},
{
"epoch": 0.71,
"eval_accuracy": 0.6121162378522405,
"eval_loss": 2.155695676803589,
"eval_runtime": 87.4417,
"eval_samples_per_second": 177.547,
"eval_steps_per_second": 22.198,
"step": 5500
},
{
"epoch": 0.77,
"learning_rate": 7.4228753115064025e-06,
"loss": 2.333,
"step": 6000
},
{
"epoch": 0.77,
"eval_accuracy": 0.6140775893820937,
"eval_loss": 2.1349785327911377,
"eval_runtime": 85.1022,
"eval_samples_per_second": 182.428,
"eval_steps_per_second": 22.808,
"step": 6000
},
{
"epoch": 0.84,
"learning_rate": 7.208472974134228e-06,
"loss": 2.311,
"step": 6500
},
{
"epoch": 0.84,
"eval_accuracy": 0.6151508455851109,
"eval_loss": 2.1292011737823486,
"eval_runtime": 79.4597,
"eval_samples_per_second": 195.382,
"eval_steps_per_second": 24.427,
"step": 6500
},
{
"epoch": 0.9,
"learning_rate": 6.993640972759303e-06,
"loss": 2.3014,
"step": 7000
},
{
"epoch": 0.9,
"eval_accuracy": 0.6166432908599604,
"eval_loss": 2.1181797981262207,
"eval_runtime": 87.6275,
"eval_samples_per_second": 177.17,
"eval_steps_per_second": 22.151,
"step": 7000
},
{
"epoch": 0.97,
"learning_rate": 6.7788089713843775e-06,
"loss": 2.2974,
"step": 7500
},
{
"epoch": 0.97,
"eval_accuracy": 0.6169897785349233,
"eval_loss": 2.112070083618164,
"eval_runtime": 83.9336,
"eval_samples_per_second": 184.968,
"eval_steps_per_second": 23.125,
"step": 7500
},
{
"epoch": 1.03,
"learning_rate": 6.563976970009453e-06,
"loss": 2.2866,
"step": 8000
},
{
"epoch": 1.03,
"eval_accuracy": 0.6173022781800038,
"eval_loss": 2.107919454574585,
"eval_runtime": 82.2636,
"eval_samples_per_second": 188.723,
"eval_steps_per_second": 23.595,
"step": 8000
},
{
"epoch": 1.1,
"learning_rate": 6.349574632637278e-06,
"loss": 2.2675,
"step": 8500
},
{
"epoch": 1.1,
"eval_accuracy": 0.6191927234863566,
"eval_loss": 2.0939817428588867,
"eval_runtime": 87.5998,
"eval_samples_per_second": 177.226,
"eval_steps_per_second": 22.158,
"step": 8500
},
{
"epoch": 1.16,
"learning_rate": 6.134742631262354e-06,
"loss": 2.2789,
"step": 9000
},
{
"epoch": 1.16,
"eval_accuracy": 0.6201220093575694,
"eval_loss": 2.088168144226074,
"eval_runtime": 83.772,
"eval_samples_per_second": 185.324,
"eval_steps_per_second": 23.17,
"step": 9000
},
{
"epoch": 1.22,
"learning_rate": 5.919910629887429e-06,
"loss": 2.2684,
"step": 9500
},
{
"epoch": 1.22,
"eval_accuracy": 0.6199849943877651,
"eval_loss": 2.0872652530670166,
"eval_runtime": 87.4418,
"eval_samples_per_second": 177.547,
"eval_steps_per_second": 22.198,
"step": 9500
},
{
"epoch": 1.29,
"learning_rate": 5.705078628512504e-06,
"loss": 2.2608,
"step": 10000
},
{
"epoch": 1.29,
"eval_accuracy": 0.6208952330586832,
"eval_loss": 2.0795998573303223,
"eval_runtime": 86.9343,
"eval_samples_per_second": 178.583,
"eval_steps_per_second": 22.327,
"step": 10000
},
{
"epoch": 1.35,
"learning_rate": 5.490246627137579e-06,
"loss": 2.2478,
"step": 10500
},
{
"epoch": 1.35,
"eval_accuracy": 0.620409766315376,
"eval_loss": 2.082674503326416,
"eval_runtime": 84.0547,
"eval_samples_per_second": 184.701,
"eval_steps_per_second": 23.092,
"step": 10500
},
{
"epoch": 1.42,
"learning_rate": 5.275844289765404e-06,
"loss": 2.2524,
"step": 11000
},
{
"epoch": 1.42,
"eval_accuracy": 0.6214935816878795,
"eval_loss": 2.074056386947632,
"eval_runtime": 87.5237,
"eval_samples_per_second": 177.381,
"eval_steps_per_second": 22.177,
"step": 11000
},
{
"epoch": 1.48,
"learning_rate": 5.061012288390479e-06,
"loss": 2.2502,
"step": 11500
},
{
"epoch": 1.48,
"eval_accuracy": 0.6220323169678965,
"eval_loss": 2.068490505218506,
"eval_runtime": 84.958,
"eval_samples_per_second": 182.737,
"eval_steps_per_second": 22.847,
"step": 11500
},
{
"epoch": 1.55,
"learning_rate": 4.8461802870155545e-06,
"loss": 2.243,
"step": 12000
},
{
"epoch": 1.55,
"eval_accuracy": 0.622761702720804,
"eval_loss": 2.0664761066436768,
"eval_runtime": 79.0021,
"eval_samples_per_second": 196.514,
"eval_steps_per_second": 24.569,
"step": 12000
},
{
"epoch": 1.61,
"learning_rate": 4.631348285640629e-06,
"loss": 2.2417,
"step": 12500
},
{
"epoch": 1.61,
"eval_accuracy": 0.6228723852166125,
"eval_loss": 2.0631983280181885,
"eval_runtime": 87.1566,
"eval_samples_per_second": 178.128,
"eval_steps_per_second": 22.27,
"step": 12500
},
{
"epoch": 1.68,
"learning_rate": 4.416516284265704e-06,
"loss": 2.2398,
"step": 13000
},
{
"epoch": 1.68,
"eval_accuracy": 0.6232123058100858,
"eval_loss": 2.0592522621154785,
"eval_runtime": 83.668,
"eval_samples_per_second": 185.555,
"eval_steps_per_second": 23.199,
"step": 13000
},
{
"epoch": 1.74,
"learning_rate": 4.20168428289078e-06,
"loss": 2.2233,
"step": 13500
},
{
"epoch": 1.74,
"eval_accuracy": 0.6232258668129607,
"eval_loss": 2.060002326965332,
"eval_runtime": 80.0466,
"eval_samples_per_second": 193.95,
"eval_steps_per_second": 24.248,
"step": 13500
},
{
"epoch": 1.8,
"learning_rate": 3.987281945518604e-06,
"loss": 2.2277,
"step": 14000
},
{
"epoch": 1.8,
"eval_accuracy": 0.623606800420627,
"eval_loss": 2.0534963607788086,
"eval_runtime": 87.4565,
"eval_samples_per_second": 177.517,
"eval_steps_per_second": 22.194,
"step": 14000
},
{
"epoch": 1.87,
"learning_rate": 3.77244994414368e-06,
"loss": 2.2344,
"step": 14500
},
{
"epoch": 1.87,
"eval_accuracy": 0.6247527084114421,
"eval_loss": 2.0484962463378906,
"eval_runtime": 83.8183,
"eval_samples_per_second": 185.222,
"eval_steps_per_second": 23.157,
"step": 14500
},
{
"epoch": 1.93,
"learning_rate": 3.5576179427687554e-06,
"loss": 2.2274,
"step": 15000
},
{
"epoch": 1.93,
"eval_accuracy": 0.6244717527399175,
"eval_loss": 2.050738573074341,
"eval_runtime": 87.5865,
"eval_samples_per_second": 177.253,
"eval_steps_per_second": 22.161,
"step": 15000
},
{
"epoch": 2.0,
"learning_rate": 3.34321560539658e-06,
"loss": 2.2212,
"step": 15500
},
{
"epoch": 2.0,
"eval_accuracy": 0.6256074101917349,
"eval_loss": 2.0428130626678467,
"eval_runtime": 86.8032,
"eval_samples_per_second": 178.853,
"eval_steps_per_second": 22.361,
"step": 15500
},
{
"epoch": 2.06,
"learning_rate": 3.1283836040216555e-06,
"loss": 2.214,
"step": 16000
},
{
"epoch": 2.06,
"eval_accuracy": 0.6244417876710062,
"eval_loss": 2.0463979244232178,
"eval_runtime": 84.1399,
"eval_samples_per_second": 184.514,
"eval_steps_per_second": 23.069,
"step": 16000
},
{
"epoch": 2.13,
"learning_rate": 2.9135516026467303e-06,
"loss": 2.2104,
"step": 16500
},
{
"epoch": 2.13,
"eval_accuracy": 0.6249873550076295,
"eval_loss": 2.0476861000061035,
"eval_runtime": 87.5417,
"eval_samples_per_second": 177.344,
"eval_steps_per_second": 22.172,
"step": 16500
},
{
"epoch": 2.19,
"learning_rate": 2.698719601271806e-06,
"loss": 2.2185,
"step": 17000
},
{
"epoch": 2.19,
"eval_accuracy": 0.6257313721221357,
"eval_loss": 2.039674758911133,
"eval_runtime": 84.986,
"eval_samples_per_second": 182.677,
"eval_steps_per_second": 22.839,
"step": 17000
},
{
"epoch": 2.26,
"learning_rate": 2.483887599896881e-06,
"loss": 2.2157,
"step": 17500
},
{
"epoch": 2.26,
"eval_accuracy": 0.6257406865679764,
"eval_loss": 2.041879177093506,
"eval_runtime": 79.7413,
"eval_samples_per_second": 194.692,
"eval_steps_per_second": 24.341,
"step": 17500
},
{
"epoch": 2.32,
"learning_rate": 2.2690555985219558e-06,
"loss": 2.2128,
"step": 18000
},
{
"epoch": 2.32,
"eval_accuracy": 0.6254893845927666,
"eval_loss": 2.043928623199463,
"eval_runtime": 87.45,
"eval_samples_per_second": 177.53,
"eval_steps_per_second": 22.196,
"step": 18000
},
{
"epoch": 2.38,
"learning_rate": 2.054223597147031e-06,
"loss": 2.2154,
"step": 18500
},
{
"epoch": 2.38,
"eval_accuracy": 0.6259225237275015,
"eval_loss": 2.037231683731079,
"eval_runtime": 83.6819,
"eval_samples_per_second": 185.524,
"eval_steps_per_second": 23.195,
"step": 18500
},
{
"epoch": 2.45,
"learning_rate": 1.8393915957721066e-06,
"loss": 2.2099,
"step": 19000
},
{
"epoch": 2.45,
"eval_accuracy": 0.62631184758297,
"eval_loss": 2.0336806774139404,
"eval_runtime": 81.3506,
"eval_samples_per_second": 190.841,
"eval_steps_per_second": 23.86,
"step": 19000
},
{
"epoch": 2.51,
"learning_rate": 1.6245595943971814e-06,
"loss": 2.2045,
"step": 19500
},
{
"epoch": 2.51,
"eval_accuracy": 0.6258799592390727,
"eval_loss": 2.039562225341797,
"eval_runtime": 87.4501,
"eval_samples_per_second": 177.53,
"eval_steps_per_second": 22.196,
"step": 19500
},
{
"epoch": 2.58,
"learning_rate": 1.4097275930222567e-06,
"loss": 2.2138,
"step": 20000
},
{
"epoch": 2.58,
"eval_accuracy": 0.6261649440028011,
"eval_loss": 2.0390186309814453,
"eval_runtime": 83.8434,
"eval_samples_per_second": 185.167,
"eval_steps_per_second": 23.15,
"step": 20000
},
{
"epoch": 2.64,
"learning_rate": 1.194895591647332e-06,
"loss": 2.2103,
"step": 20500
},
{
"epoch": 2.64,
"eval_accuracy": 0.6262993215315168,
"eval_loss": 2.03385329246521,
"eval_runtime": 87.3376,
"eval_samples_per_second": 177.759,
"eval_steps_per_second": 22.224,
"step": 20500
},
{
"epoch": 2.64,
"step": 20500,
"total_flos": 1.0082485751267328e+16,
"train_loss": 2.351401915015244,
"train_runtime": 7059.8447,
"train_samples_per_second": 52.745,
"train_steps_per_second": 3.297
}
],
"logging_steps": 500,
"max_steps": 23274,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.0082485751267328e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}