{ "best_metric": 1.122239351272583, "best_model_checkpoint": "./res_1/checkpoint-8500", "epoch": 3.72168284789644, "global_step": 11500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.5e-05, "loss": 4.4764, "step": 250 }, { "epoch": 0.08, "eval_accuracy": 0.2770019218449712, "eval_f1_score": 0.15899206409534755, "eval_loss": 3.3759119510650635, "eval_runtime": 78.6208, "eval_samples_per_second": 198.548, "eval_steps_per_second": 2.073, "step": 250 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 2.7621, "step": 500 }, { "epoch": 0.16, "eval_accuracy": 0.45624599615631006, "eval_f1_score": 0.37938588394970946, "eval_loss": 2.22213077545166, "eval_runtime": 79.1544, "eval_samples_per_second": 197.21, "eval_steps_per_second": 2.059, "step": 500 }, { "epoch": 0.24, "learning_rate": 4.89460370994941e-05, "loss": 2.0369, "step": 750 }, { "epoch": 0.24, "eval_accuracy": 0.5180653427290198, "eval_f1_score": 0.4651383493082148, "eval_loss": 1.8033452033996582, "eval_runtime": 78.4993, "eval_samples_per_second": 198.855, "eval_steps_per_second": 2.076, "step": 750 }, { "epoch": 0.32, "learning_rate": 4.7892074198988196e-05, "loss": 1.7846, "step": 1000 }, { "epoch": 0.32, "eval_accuracy": 0.5547085201793722, "eval_f1_score": 0.5145917984904441, "eval_loss": 1.6128593683242798, "eval_runtime": 79.1904, "eval_samples_per_second": 197.12, "eval_steps_per_second": 2.058, "step": 1000 }, { "epoch": 0.4, "learning_rate": 4.683811129848229e-05, "loss": 1.5885, "step": 1250 }, { "epoch": 0.4, "eval_accuracy": 0.5733504163997437, "eval_f1_score": 0.5403077333842975, "eval_loss": 1.5020724534988403, "eval_runtime": 79.2228, "eval_samples_per_second": 197.039, "eval_steps_per_second": 2.057, "step": 1250 }, { "epoch": 0.49, "learning_rate": 4.5784148397976396e-05, "loss": 1.5474, "step": 1500 }, { "epoch": 0.49, "eval_accuracy": 0.5885970531710442, "eval_f1_score": 0.5598433530383174, "eval_loss": 1.4353328943252563, "eval_runtime": 79.3289, "eval_samples_per_second": 196.776, "eval_steps_per_second": 2.055, "step": 1500 }, { "epoch": 0.57, "learning_rate": 4.4730185497470486e-05, "loss": 1.4754, "step": 1750 }, { "epoch": 0.57, "eval_accuracy": 0.5910954516335682, "eval_f1_score": 0.567613249026739, "eval_loss": 1.4085466861724854, "eval_runtime": 79.2487, "eval_samples_per_second": 196.975, "eval_steps_per_second": 2.057, "step": 1750 }, { "epoch": 0.65, "learning_rate": 4.367622259696459e-05, "loss": 1.3951, "step": 2000 }, { "epoch": 0.65, "eval_accuracy": 0.6080717488789238, "eval_f1_score": 0.5881168924607102, "eval_loss": 1.3425586223602295, "eval_runtime": 79.2258, "eval_samples_per_second": 197.032, "eval_steps_per_second": 2.057, "step": 2000 }, { "epoch": 0.73, "learning_rate": 4.262225969645869e-05, "loss": 1.3711, "step": 2250 }, { "epoch": 0.73, "eval_accuracy": 0.6153747597693786, "eval_f1_score": 0.5932326664510665, "eval_loss": 1.3140783309936523, "eval_runtime": 79.1766, "eval_samples_per_second": 197.154, "eval_steps_per_second": 2.059, "step": 2250 }, { "epoch": 0.81, "learning_rate": 4.1568296795952783e-05, "loss": 1.3487, "step": 2500 }, { "epoch": 0.81, "eval_accuracy": 0.6163997437540039, "eval_f1_score": 0.5956180167874238, "eval_loss": 1.3029619455337524, "eval_runtime": 79.2987, "eval_samples_per_second": 196.851, "eval_steps_per_second": 2.056, "step": 2500 }, { "epoch": 0.89, "learning_rate": 4.051433389544688e-05, "loss": 1.325, "step": 2750 }, { "epoch": 0.89, "eval_accuracy": 0.6240230621396541, "eval_f1_score": 0.6027158431466506, "eval_loss": 1.2634259462356567, "eval_runtime": 79.2608, "eval_samples_per_second": 196.945, "eval_steps_per_second": 2.057, "step": 2750 }, { "epoch": 0.97, "learning_rate": 3.9460370994940984e-05, "loss": 1.307, "step": 3000 }, { "epoch": 0.97, "eval_accuracy": 0.6235105701473415, "eval_f1_score": 0.6034162405543351, "eval_loss": 1.2556581497192383, "eval_runtime": 79.2153, "eval_samples_per_second": 197.058, "eval_steps_per_second": 2.058, "step": 3000 }, { "epoch": 1.05, "learning_rate": 3.8406408094435074e-05, "loss": 1.2019, "step": 3250 }, { "epoch": 1.05, "eval_accuracy": 0.6322869955156951, "eval_f1_score": 0.6147432260064974, "eval_loss": 1.227844476699829, "eval_runtime": 79.4547, "eval_samples_per_second": 196.464, "eval_steps_per_second": 2.051, "step": 3250 }, { "epoch": 1.13, "learning_rate": 3.735244519392918e-05, "loss": 1.1366, "step": 3500 }, { "epoch": 1.13, "eval_accuracy": 0.6326713645099296, "eval_f1_score": 0.6163567005428549, "eval_loss": 1.2262649536132812, "eval_runtime": 79.2235, "eval_samples_per_second": 197.037, "eval_steps_per_second": 2.057, "step": 3500 }, { "epoch": 1.21, "learning_rate": 3.6298482293423274e-05, "loss": 1.1643, "step": 3750 }, { "epoch": 1.21, "eval_accuracy": 0.6368994234465086, "eval_f1_score": 0.6215204096974251, "eval_loss": 1.2120453119277954, "eval_runtime": 79.1999, "eval_samples_per_second": 197.096, "eval_steps_per_second": 2.058, "step": 3750 }, { "epoch": 1.29, "learning_rate": 3.524451939291737e-05, "loss": 1.1226, "step": 4000 }, { "epoch": 1.29, "eval_accuracy": 0.6377322229340167, "eval_f1_score": 0.6225832875038563, "eval_loss": 1.2013684511184692, "eval_runtime": 79.3501, "eval_samples_per_second": 196.723, "eval_steps_per_second": 2.054, "step": 4000 }, { "epoch": 1.38, "learning_rate": 3.419055649241147e-05, "loss": 1.1193, "step": 4250 }, { "epoch": 1.38, "eval_accuracy": 0.6332479180012812, "eval_f1_score": 0.6195484695149066, "eval_loss": 1.2123792171478271, "eval_runtime": 79.3987, "eval_samples_per_second": 196.603, "eval_steps_per_second": 2.053, "step": 4250 }, { "epoch": 1.46, "learning_rate": 3.313659359190557e-05, "loss": 1.1093, "step": 4500 }, { "epoch": 1.46, "eval_accuracy": 0.6402306213965406, "eval_f1_score": 0.6254312551097587, "eval_loss": 1.1897130012512207, "eval_runtime": 79.5616, "eval_samples_per_second": 196.2, "eval_steps_per_second": 2.049, "step": 4500 }, { "epoch": 1.54, "learning_rate": 3.208263069139966e-05, "loss": 1.0848, "step": 4750 }, { "epoch": 1.54, "eval_accuracy": 0.6440102498398462, "eval_f1_score": 0.6291617598687145, "eval_loss": 1.178646445274353, "eval_runtime": 79.2558, "eval_samples_per_second": 196.957, "eval_steps_per_second": 2.057, "step": 4750 }, { "epoch": 1.62, "learning_rate": 3.1028667790893765e-05, "loss": 1.1105, "step": 5000 }, { "epoch": 1.62, "eval_accuracy": 0.649583600256246, "eval_f1_score": 0.6335161931693469, "eval_loss": 1.1682264804840088, "eval_runtime": 79.3313, "eval_samples_per_second": 196.77, "eval_steps_per_second": 2.055, "step": 5000 }, { "epoch": 1.7, "learning_rate": 2.997470489038786e-05, "loss": 1.1014, "step": 5250 }, { "epoch": 1.7, "eval_accuracy": 0.6516335682254965, "eval_f1_score": 0.6377925627710681, "eval_loss": 1.156283974647522, "eval_runtime": 79.534, "eval_samples_per_second": 196.268, "eval_steps_per_second": 2.049, "step": 5250 }, { "epoch": 1.78, "learning_rate": 2.8920741989881955e-05, "loss": 1.0828, "step": 5500 }, { "epoch": 1.78, "eval_accuracy": 0.6459961563100577, "eval_f1_score": 0.6313286940706383, "eval_loss": 1.1570398807525635, "eval_runtime": 79.5476, "eval_samples_per_second": 196.235, "eval_steps_per_second": 2.049, "step": 5500 }, { "epoch": 1.86, "learning_rate": 2.7866779089376055e-05, "loss": 1.0803, "step": 5750 }, { "epoch": 1.86, "eval_accuracy": 0.6530429212043561, "eval_f1_score": 0.6404845558613215, "eval_loss": 1.146980881690979, "eval_runtime": 79.2229, "eval_samples_per_second": 197.039, "eval_steps_per_second": 2.057, "step": 5750 }, { "epoch": 1.94, "learning_rate": 2.6812816188870155e-05, "loss": 1.0799, "step": 6000 }, { "epoch": 1.94, "eval_accuracy": 0.6516976297245356, "eval_f1_score": 0.638943219006341, "eval_loss": 1.1435405015945435, "eval_runtime": 79.322, "eval_samples_per_second": 196.793, "eval_steps_per_second": 2.055, "step": 6000 }, { "epoch": 2.02, "learning_rate": 2.575885328836425e-05, "loss": 1.0172, "step": 6250 }, { "epoch": 2.02, "eval_accuracy": 0.647982062780269, "eval_f1_score": 0.6342691459320453, "eval_loss": 1.157272219657898, "eval_runtime": 79.3295, "eval_samples_per_second": 196.774, "eval_steps_per_second": 2.055, "step": 6250 }, { "epoch": 2.1, "learning_rate": 2.470489038785835e-05, "loss": 0.8986, "step": 6500 }, { "epoch": 2.1, "eval_accuracy": 0.6488789237668161, "eval_f1_score": 0.6382130933006974, "eval_loss": 1.1634544134140015, "eval_runtime": 79.298, "eval_samples_per_second": 196.852, "eval_steps_per_second": 2.056, "step": 6500 }, { "epoch": 2.18, "learning_rate": 2.3650927487352446e-05, "loss": 0.8871, "step": 6750 }, { "epoch": 2.18, "eval_accuracy": 0.6540679051889814, "eval_f1_score": 0.6404856756767366, "eval_loss": 1.1588941812515259, "eval_runtime": 79.2578, "eval_samples_per_second": 196.952, "eval_steps_per_second": 2.057, "step": 6750 }, { "epoch": 2.27, "learning_rate": 2.2596964586846546e-05, "loss": 0.9066, "step": 7000 }, { "epoch": 2.27, "eval_accuracy": 0.6515695067264574, "eval_f1_score": 0.6402759354718203, "eval_loss": 1.146422266960144, "eval_runtime": 79.5354, "eval_samples_per_second": 196.265, "eval_steps_per_second": 2.049, "step": 7000 }, { "epoch": 2.35, "learning_rate": 2.1543001686340643e-05, "loss": 0.897, "step": 7250 }, { "epoch": 2.35, "eval_accuracy": 0.650352338244715, "eval_f1_score": 0.6389635569093923, "eval_loss": 1.1561025381088257, "eval_runtime": 79.2894, "eval_samples_per_second": 196.874, "eval_steps_per_second": 2.056, "step": 7250 }, { "epoch": 2.43, "learning_rate": 2.048903878583474e-05, "loss": 0.8988, "step": 7500 }, { "epoch": 2.43, "eval_accuracy": 0.6547725816784112, "eval_f1_score": 0.6425914825215071, "eval_loss": 1.1453956365585327, "eval_runtime": 79.2343, "eval_samples_per_second": 197.011, "eval_steps_per_second": 2.057, "step": 7500 }, { "epoch": 2.51, "learning_rate": 1.943507588532884e-05, "loss": 0.9023, "step": 7750 }, { "epoch": 2.51, "eval_accuracy": 0.6542600896860986, "eval_f1_score": 0.6438243504000527, "eval_loss": 1.1429415941238403, "eval_runtime": 79.2597, "eval_samples_per_second": 196.947, "eval_steps_per_second": 2.057, "step": 7750 }, { "epoch": 2.59, "learning_rate": 1.8381112984822936e-05, "loss": 0.8791, "step": 8000 }, { "epoch": 2.59, "eval_accuracy": 0.6572069186418962, "eval_f1_score": 0.647070357907258, "eval_loss": 1.1421723365783691, "eval_runtime": 79.5074, "eval_samples_per_second": 196.334, "eval_steps_per_second": 2.05, "step": 8000 }, { "epoch": 2.67, "learning_rate": 1.7327150084317033e-05, "loss": 0.8971, "step": 8250 }, { "epoch": 2.67, "eval_accuracy": 0.6581678411274824, "eval_f1_score": 0.6474305978141667, "eval_loss": 1.1367805004119873, "eval_runtime": 79.1975, "eval_samples_per_second": 197.102, "eval_steps_per_second": 2.058, "step": 8250 }, { "epoch": 2.75, "learning_rate": 1.627318718381113e-05, "loss": 0.8885, "step": 8500 }, { "epoch": 2.75, "eval_accuracy": 0.6608584240871236, "eval_f1_score": 0.6487602589453412, "eval_loss": 1.122239351272583, "eval_runtime": 79.2042, "eval_samples_per_second": 197.085, "eval_steps_per_second": 2.058, "step": 8500 }, { "epoch": 2.83, "learning_rate": 1.5219224283305228e-05, "loss": 0.8745, "step": 8750 }, { "epoch": 2.83, "eval_accuracy": 0.6579756566303652, "eval_f1_score": 0.6483741964856444, "eval_loss": 1.1291719675064087, "eval_runtime": 79.4248, "eval_samples_per_second": 196.538, "eval_steps_per_second": 2.052, "step": 8750 }, { "epoch": 2.91, "learning_rate": 1.4165261382799327e-05, "loss": 0.8837, "step": 9000 }, { "epoch": 2.91, "eval_accuracy": 0.6574631646380525, "eval_f1_score": 0.64725568770933, "eval_loss": 1.1298834085464478, "eval_runtime": 79.278, "eval_samples_per_second": 196.902, "eval_steps_per_second": 2.056, "step": 9000 }, { "epoch": 2.99, "learning_rate": 1.3111298482293424e-05, "loss": 0.8604, "step": 9250 }, { "epoch": 2.99, "eval_accuracy": 0.6556694426649584, "eval_f1_score": 0.6459143709441728, "eval_loss": 1.1267211437225342, "eval_runtime": 79.3015, "eval_samples_per_second": 196.844, "eval_steps_per_second": 2.055, "step": 9250 }, { "epoch": 3.07, "learning_rate": 1.205733558178752e-05, "loss": 0.754, "step": 9500 }, { "epoch": 3.07, "eval_accuracy": 0.657847533632287, "eval_f1_score": 0.6475765832075672, "eval_loss": 1.1378750801086426, "eval_runtime": 79.3879, "eval_samples_per_second": 196.63, "eval_steps_per_second": 2.053, "step": 9500 }, { "epoch": 3.16, "learning_rate": 1.1003372681281619e-05, "loss": 0.7371, "step": 9750 }, { "epoch": 3.16, "eval_accuracy": 0.658744394618834, "eval_f1_score": 0.648976579094328, "eval_loss": 1.1473287343978882, "eval_runtime": 79.2492, "eval_samples_per_second": 196.974, "eval_steps_per_second": 2.057, "step": 9750 }, { "epoch": 3.24, "learning_rate": 9.949409780775717e-06, "loss": 0.7418, "step": 10000 }, { "epoch": 3.24, "eval_accuracy": 0.6577194106342088, "eval_f1_score": 0.6481259620512635, "eval_loss": 1.1429522037506104, "eval_runtime": 79.2653, "eval_samples_per_second": 196.934, "eval_steps_per_second": 2.056, "step": 10000 }, { "epoch": 3.32, "learning_rate": 8.895446880269814e-06, "loss": 0.7478, "step": 10250 }, { "epoch": 3.32, "eval_accuracy": 0.6581678411274824, "eval_f1_score": 0.6480298727343113, "eval_loss": 1.147200107574463, "eval_runtime": 79.3533, "eval_samples_per_second": 196.715, "eval_steps_per_second": 2.054, "step": 10250 }, { "epoch": 3.4, "learning_rate": 7.841483979763913e-06, "loss": 0.7214, "step": 10500 }, { "epoch": 3.4, "eval_accuracy": 0.6593209481101858, "eval_f1_score": 0.6487748216018238, "eval_loss": 1.1483687162399292, "eval_runtime": 79.2151, "eval_samples_per_second": 197.058, "eval_steps_per_second": 2.058, "step": 10500 }, { "epoch": 3.48, "learning_rate": 6.787521079258011e-06, "loss": 0.7378, "step": 10750 }, { "epoch": 3.48, "eval_accuracy": 0.6613709160794362, "eval_f1_score": 0.6515662944938931, "eval_loss": 1.1435444355010986, "eval_runtime": 79.1971, "eval_samples_per_second": 197.103, "eval_steps_per_second": 2.058, "step": 10750 }, { "epoch": 3.56, "learning_rate": 5.733558178752108e-06, "loss": 0.746, "step": 11000 }, { "epoch": 3.56, "eval_accuracy": 0.6582959641255606, "eval_f1_score": 0.649775469900882, "eval_loss": 1.142891764640808, "eval_runtime": 79.304, "eval_samples_per_second": 196.838, "eval_steps_per_second": 2.055, "step": 11000 }, { "epoch": 3.64, "learning_rate": 4.6795952782462055e-06, "loss": 0.7355, "step": 11250 }, { "epoch": 3.64, "eval_accuracy": 0.6614349775784754, "eval_f1_score": 0.6523276061496733, "eval_loss": 1.1420927047729492, "eval_runtime": 79.4328, "eval_samples_per_second": 196.518, "eval_steps_per_second": 2.052, "step": 11250 }, { "epoch": 3.72, "learning_rate": 3.625632377740304e-06, "loss": 0.7207, "step": 11500 }, { "epoch": 3.72, "eval_accuracy": 0.6618193465727098, "eval_f1_score": 0.6531447427234552, "eval_loss": 1.1392197608947754, "eval_runtime": 79.3578, "eval_samples_per_second": 196.704, "eval_steps_per_second": 2.054, "step": 11500 } ], "max_steps": 12360, "num_train_epochs": 4, "total_flos": 1.4666146788722688e+17, "trial_name": null, "trial_params": null }