{ "best_metric": 2.0336806774139404, "best_model_checkpoint": "finetuning/output/electra-adapter-finetuned_xe_ey_fae/checkpoint-19000", "epoch": 2.642433616911575, "eval_steps": 500, "global_step": 20500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 9.785167998625076e-06, "loss": 3.9488, "step": 500 }, { "epoch": 0.06, "eval_accuracy": 0.5508950432882589, "eval_loss": 3.1499977111816406, "eval_runtime": 85.1217, "eval_samples_per_second": 182.386, "eval_steps_per_second": 22.803, "step": 500 }, { "epoch": 0.13, "learning_rate": 9.57033599725015e-06, "loss": 2.942, "step": 1000 }, { "epoch": 0.13, "eval_accuracy": 0.5680209177510359, "eval_loss": 2.584392547607422, "eval_runtime": 79.4716, "eval_samples_per_second": 195.353, "eval_steps_per_second": 24.424, "step": 1000 }, { "epoch": 0.19, "learning_rate": 9.355503995875225e-06, "loss": 2.6751, "step": 1500 }, { "epoch": 0.19, "eval_accuracy": 0.578970986434046, "eval_loss": 2.444335699081421, "eval_runtime": 87.5675, "eval_samples_per_second": 177.292, "eval_steps_per_second": 22.166, "step": 1500 }, { "epoch": 0.26, "learning_rate": 9.140671994500302e-06, "loss": 2.582, "step": 2000 }, { "epoch": 0.26, "eval_accuracy": 0.5868782143731802, "eval_loss": 2.3700673580169678, "eval_runtime": 83.7436, "eval_samples_per_second": 185.387, "eval_steps_per_second": 23.178, "step": 2000 }, { "epoch": 0.32, "learning_rate": 8.926269657128126e-06, "loss": 2.5267, "step": 2500 }, { "epoch": 0.32, "eval_accuracy": 0.5937291646823507, "eval_loss": 2.309689998626709, "eval_runtime": 81.3517, "eval_samples_per_second": 190.838, "eval_steps_per_second": 23.859, "step": 2500 }, { "epoch": 0.39, "learning_rate": 8.711437655753203e-06, "loss": 2.4722, "step": 3000 }, { "epoch": 0.39, "eval_accuracy": 0.5985969269659629, "eval_loss": 2.2695114612579346, "eval_runtime": 87.4381, "eval_samples_per_second": 177.554, "eval_steps_per_second": 22.199, "step": 3000 }, { "epoch": 0.45, "learning_rate": 8.497035318381027e-06, "loss": 2.4289, "step": 3500 }, { "epoch": 0.45, "eval_accuracy": 0.602404170197503, "eval_loss": 2.2328779697418213, "eval_runtime": 83.7759, "eval_samples_per_second": 185.316, "eval_steps_per_second": 23.169, "step": 3500 }, { "epoch": 0.52, "learning_rate": 8.282203317006102e-06, "loss": 2.404, "step": 4000 }, { "epoch": 0.52, "eval_accuracy": 0.6055254061674608, "eval_loss": 2.206317901611328, "eval_runtime": 87.3965, "eval_samples_per_second": 177.639, "eval_steps_per_second": 22.209, "step": 4000 }, { "epoch": 0.58, "learning_rate": 8.067371315631177e-06, "loss": 2.3826, "step": 4500 }, { "epoch": 0.58, "eval_accuracy": 0.6086694296803393, "eval_loss": 2.183983087539673, "eval_runtime": 87.0314, "eval_samples_per_second": 178.384, "eval_steps_per_second": 22.302, "step": 4500 }, { "epoch": 0.64, "learning_rate": 7.852539314256252e-06, "loss": 2.3633, "step": 5000 }, { "epoch": 0.64, "eval_accuracy": 0.6108753723178051, "eval_loss": 2.1645586490631104, "eval_runtime": 83.9383, "eval_samples_per_second": 184.957, "eval_steps_per_second": 23.124, "step": 5000 }, { "epoch": 0.71, "learning_rate": 7.637707312881327e-06, "loss": 2.3425, "step": 5500 }, { "epoch": 0.71, "eval_accuracy": 0.6121162378522405, "eval_loss": 2.155695676803589, "eval_runtime": 87.4417, "eval_samples_per_second": 177.547, "eval_steps_per_second": 22.198, "step": 5500 }, { "epoch": 0.77, "learning_rate": 7.4228753115064025e-06, "loss": 2.333, "step": 6000 }, { "epoch": 0.77, "eval_accuracy": 0.6140775893820937, "eval_loss": 2.1349785327911377, "eval_runtime": 85.1022, "eval_samples_per_second": 182.428, "eval_steps_per_second": 22.808, "step": 6000 }, { "epoch": 0.84, "learning_rate": 7.208472974134228e-06, "loss": 2.311, "step": 6500 }, { "epoch": 0.84, "eval_accuracy": 0.6151508455851109, "eval_loss": 2.1292011737823486, "eval_runtime": 79.4597, "eval_samples_per_second": 195.382, "eval_steps_per_second": 24.427, "step": 6500 }, { "epoch": 0.9, "learning_rate": 6.993640972759303e-06, "loss": 2.3014, "step": 7000 }, { "epoch": 0.9, "eval_accuracy": 0.6166432908599604, "eval_loss": 2.1181797981262207, "eval_runtime": 87.6275, "eval_samples_per_second": 177.17, "eval_steps_per_second": 22.151, "step": 7000 }, { "epoch": 0.97, "learning_rate": 6.7788089713843775e-06, "loss": 2.2974, "step": 7500 }, { "epoch": 0.97, "eval_accuracy": 0.6169897785349233, "eval_loss": 2.112070083618164, "eval_runtime": 83.9336, "eval_samples_per_second": 184.968, "eval_steps_per_second": 23.125, "step": 7500 }, { "epoch": 1.03, "learning_rate": 6.563976970009453e-06, "loss": 2.2866, "step": 8000 }, { "epoch": 1.03, "eval_accuracy": 0.6173022781800038, "eval_loss": 2.107919454574585, "eval_runtime": 82.2636, "eval_samples_per_second": 188.723, "eval_steps_per_second": 23.595, "step": 8000 }, { "epoch": 1.1, "learning_rate": 6.349574632637278e-06, "loss": 2.2675, "step": 8500 }, { "epoch": 1.1, "eval_accuracy": 0.6191927234863566, "eval_loss": 2.0939817428588867, "eval_runtime": 87.5998, "eval_samples_per_second": 177.226, "eval_steps_per_second": 22.158, "step": 8500 }, { "epoch": 1.16, "learning_rate": 6.134742631262354e-06, "loss": 2.2789, "step": 9000 }, { "epoch": 1.16, "eval_accuracy": 0.6201220093575694, "eval_loss": 2.088168144226074, "eval_runtime": 83.772, "eval_samples_per_second": 185.324, "eval_steps_per_second": 23.17, "step": 9000 }, { "epoch": 1.22, "learning_rate": 5.919910629887429e-06, "loss": 2.2684, "step": 9500 }, { "epoch": 1.22, "eval_accuracy": 0.6199849943877651, "eval_loss": 2.0872652530670166, "eval_runtime": 87.4418, "eval_samples_per_second": 177.547, "eval_steps_per_second": 22.198, "step": 9500 }, { "epoch": 1.29, "learning_rate": 5.705078628512504e-06, "loss": 2.2608, "step": 10000 }, { "epoch": 1.29, "eval_accuracy": 0.6208952330586832, "eval_loss": 2.0795998573303223, "eval_runtime": 86.9343, "eval_samples_per_second": 178.583, "eval_steps_per_second": 22.327, "step": 10000 }, { "epoch": 1.35, "learning_rate": 5.490246627137579e-06, "loss": 2.2478, "step": 10500 }, { "epoch": 1.35, "eval_accuracy": 0.620409766315376, "eval_loss": 2.082674503326416, "eval_runtime": 84.0547, "eval_samples_per_second": 184.701, "eval_steps_per_second": 23.092, "step": 10500 }, { "epoch": 1.42, "learning_rate": 5.275844289765404e-06, "loss": 2.2524, "step": 11000 }, { "epoch": 1.42, "eval_accuracy": 0.6214935816878795, "eval_loss": 2.074056386947632, "eval_runtime": 87.5237, "eval_samples_per_second": 177.381, "eval_steps_per_second": 22.177, "step": 11000 }, { "epoch": 1.48, "learning_rate": 5.061012288390479e-06, "loss": 2.2502, "step": 11500 }, { "epoch": 1.48, "eval_accuracy": 0.6220323169678965, "eval_loss": 2.068490505218506, "eval_runtime": 84.958, "eval_samples_per_second": 182.737, "eval_steps_per_second": 22.847, "step": 11500 }, { "epoch": 1.55, "learning_rate": 4.8461802870155545e-06, "loss": 2.243, "step": 12000 }, { "epoch": 1.55, "eval_accuracy": 0.622761702720804, "eval_loss": 2.0664761066436768, "eval_runtime": 79.0021, "eval_samples_per_second": 196.514, "eval_steps_per_second": 24.569, "step": 12000 }, { "epoch": 1.61, "learning_rate": 4.631348285640629e-06, "loss": 2.2417, "step": 12500 }, { "epoch": 1.61, "eval_accuracy": 0.6228723852166125, "eval_loss": 2.0631983280181885, "eval_runtime": 87.1566, "eval_samples_per_second": 178.128, "eval_steps_per_second": 22.27, "step": 12500 }, { "epoch": 1.68, "learning_rate": 4.416516284265704e-06, "loss": 2.2398, "step": 13000 }, { "epoch": 1.68, "eval_accuracy": 0.6232123058100858, "eval_loss": 2.0592522621154785, "eval_runtime": 83.668, "eval_samples_per_second": 185.555, "eval_steps_per_second": 23.199, "step": 13000 }, { "epoch": 1.74, "learning_rate": 4.20168428289078e-06, "loss": 2.2233, "step": 13500 }, { "epoch": 1.74, "eval_accuracy": 0.6232258668129607, "eval_loss": 2.060002326965332, "eval_runtime": 80.0466, "eval_samples_per_second": 193.95, "eval_steps_per_second": 24.248, "step": 13500 }, { "epoch": 1.8, "learning_rate": 3.987281945518604e-06, "loss": 2.2277, "step": 14000 }, { "epoch": 1.8, "eval_accuracy": 0.623606800420627, "eval_loss": 2.0534963607788086, "eval_runtime": 87.4565, "eval_samples_per_second": 177.517, "eval_steps_per_second": 22.194, "step": 14000 }, { "epoch": 1.87, "learning_rate": 3.77244994414368e-06, "loss": 2.2344, "step": 14500 }, { "epoch": 1.87, "eval_accuracy": 0.6247527084114421, "eval_loss": 2.0484962463378906, "eval_runtime": 83.8183, "eval_samples_per_second": 185.222, "eval_steps_per_second": 23.157, "step": 14500 }, { "epoch": 1.93, "learning_rate": 3.5576179427687554e-06, "loss": 2.2274, "step": 15000 }, { "epoch": 1.93, "eval_accuracy": 0.6244717527399175, "eval_loss": 2.050738573074341, "eval_runtime": 87.5865, "eval_samples_per_second": 177.253, "eval_steps_per_second": 22.161, "step": 15000 }, { "epoch": 2.0, "learning_rate": 3.34321560539658e-06, "loss": 2.2212, "step": 15500 }, { "epoch": 2.0, "eval_accuracy": 0.6256074101917349, "eval_loss": 2.0428130626678467, "eval_runtime": 86.8032, "eval_samples_per_second": 178.853, "eval_steps_per_second": 22.361, "step": 15500 }, { "epoch": 2.06, "learning_rate": 3.1283836040216555e-06, "loss": 2.214, "step": 16000 }, { "epoch": 2.06, "eval_accuracy": 0.6244417876710062, "eval_loss": 2.0463979244232178, "eval_runtime": 84.1399, "eval_samples_per_second": 184.514, "eval_steps_per_second": 23.069, "step": 16000 }, { "epoch": 2.13, "learning_rate": 2.9135516026467303e-06, "loss": 2.2104, "step": 16500 }, { "epoch": 2.13, "eval_accuracy": 0.6249873550076295, "eval_loss": 2.0476861000061035, "eval_runtime": 87.5417, "eval_samples_per_second": 177.344, "eval_steps_per_second": 22.172, "step": 16500 }, { "epoch": 2.19, "learning_rate": 2.698719601271806e-06, "loss": 2.2185, "step": 17000 }, { "epoch": 2.19, "eval_accuracy": 0.6257313721221357, "eval_loss": 2.039674758911133, "eval_runtime": 84.986, "eval_samples_per_second": 182.677, "eval_steps_per_second": 22.839, "step": 17000 }, { "epoch": 2.26, "learning_rate": 2.483887599896881e-06, "loss": 2.2157, "step": 17500 }, { "epoch": 2.26, "eval_accuracy": 0.6257406865679764, "eval_loss": 2.041879177093506, "eval_runtime": 79.7413, "eval_samples_per_second": 194.692, "eval_steps_per_second": 24.341, "step": 17500 }, { "epoch": 2.32, "learning_rate": 2.2690555985219558e-06, "loss": 2.2128, "step": 18000 }, { "epoch": 2.32, "eval_accuracy": 0.6254893845927666, "eval_loss": 2.043928623199463, "eval_runtime": 87.45, "eval_samples_per_second": 177.53, "eval_steps_per_second": 22.196, "step": 18000 }, { "epoch": 2.38, "learning_rate": 2.054223597147031e-06, "loss": 2.2154, "step": 18500 }, { "epoch": 2.38, "eval_accuracy": 0.6259225237275015, "eval_loss": 2.037231683731079, "eval_runtime": 83.6819, "eval_samples_per_second": 185.524, "eval_steps_per_second": 23.195, "step": 18500 }, { "epoch": 2.45, "learning_rate": 1.8393915957721066e-06, "loss": 2.2099, "step": 19000 }, { "epoch": 2.45, "eval_accuracy": 0.62631184758297, "eval_loss": 2.0336806774139404, "eval_runtime": 81.3506, "eval_samples_per_second": 190.841, "eval_steps_per_second": 23.86, "step": 19000 }, { "epoch": 2.51, "learning_rate": 1.6245595943971814e-06, "loss": 2.2045, "step": 19500 }, { "epoch": 2.51, "eval_accuracy": 0.6258799592390727, "eval_loss": 2.039562225341797, "eval_runtime": 87.4501, "eval_samples_per_second": 177.53, "eval_steps_per_second": 22.196, "step": 19500 }, { "epoch": 2.58, "learning_rate": 1.4097275930222567e-06, "loss": 2.2138, "step": 20000 }, { "epoch": 2.58, "eval_accuracy": 0.6261649440028011, "eval_loss": 2.0390186309814453, "eval_runtime": 83.8434, "eval_samples_per_second": 185.167, "eval_steps_per_second": 23.15, "step": 20000 }, { "epoch": 2.64, "learning_rate": 1.194895591647332e-06, "loss": 2.2103, "step": 20500 }, { "epoch": 2.64, "eval_accuracy": 0.6262993215315168, "eval_loss": 2.03385329246521, "eval_runtime": 87.3376, "eval_samples_per_second": 177.759, "eval_steps_per_second": 22.224, "step": 20500 }, { "epoch": 2.64, "step": 20500, "total_flos": 1.0082485751267328e+16, "train_loss": 2.351401915015244, "train_runtime": 7059.8447, "train_samples_per_second": 52.745, "train_steps_per_second": 3.297 } ], "logging_steps": 500, "max_steps": 23274, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.0082485751267328e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }