{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.4341020584106445, "learning_rate": 4.75e-05, "loss": 0.3817, "step": 106 }, { "epoch": 1.0, "eval_accuracy": 0.9515033947623667, "eval_f1": 0.793193717277487, "eval_loss": 0.14420253038406372, "eval_precision": 0.7266187050359713, "eval_recall": 0.8731988472622478, "eval_runtime": 0.9468, "eval_samples_per_second": 197.515, "eval_steps_per_second": 3.169, "step": 106 }, { "epoch": 2.0, "grad_norm": 2.4988839626312256, "learning_rate": 4.5e-05, "loss": 0.1266, "step": 212 }, { "epoch": 2.0, "eval_accuracy": 0.9551406401551892, "eval_f1": 0.8083441981747066, "eval_loss": 0.13850077986717224, "eval_precision": 0.7380952380952381, "eval_recall": 0.8933717579250721, "eval_runtime": 0.9888, "eval_samples_per_second": 189.116, "eval_steps_per_second": 3.034, "step": 212 }, { "epoch": 3.0, "grad_norm": 2.6240944862365723, "learning_rate": 4.25e-05, "loss": 0.087, "step": 318 }, { "epoch": 3.0, "eval_accuracy": 0.9568380213385063, "eval_f1": 0.8100929614873836, "eval_loss": 0.13670620322227478, "eval_precision": 0.7512315270935961, "eval_recall": 0.8789625360230547, "eval_runtime": 0.9262, "eval_samples_per_second": 201.901, "eval_steps_per_second": 3.239, "step": 318 }, { "epoch": 4.0, "grad_norm": 0.2754727900028229, "learning_rate": 4e-05, "loss": 0.0528, "step": 424 }, { "epoch": 4.0, "eval_accuracy": 0.9595053346265762, "eval_f1": 0.8163265306122449, "eval_loss": 0.14675553143024445, "eval_precision": 0.7731958762886598, "eval_recall": 0.8645533141210374, "eval_runtime": 0.9413, "eval_samples_per_second": 198.665, "eval_steps_per_second": 3.187, "step": 424 }, { "epoch": 5.0, "grad_norm": 3.4952049255371094, "learning_rate": 3.7500000000000003e-05, "loss": 0.0424, "step": 530 }, { "epoch": 5.0, "eval_accuracy": 0.960717749757517, "eval_f1": 0.8215767634854771, "eval_loss": 0.1664169430732727, "eval_precision": 0.7898936170212766, "eval_recall": 0.8559077809798271, "eval_runtime": 1.041, "eval_samples_per_second": 179.63, "eval_steps_per_second": 2.882, "step": 530 }, { "epoch": 6.0, "grad_norm": 1.5230388641357422, "learning_rate": 3.5e-05, "loss": 0.0275, "step": 636 }, { "epoch": 6.0, "eval_accuracy": 0.9582929194956353, "eval_f1": 0.8241610738255033, "eval_loss": 0.20444069802761078, "eval_precision": 0.7713567839195979, "eval_recall": 0.8847262247838616, "eval_runtime": 0.9522, "eval_samples_per_second": 196.387, "eval_steps_per_second": 3.151, "step": 636 }, { "epoch": 7.0, "grad_norm": 1.4965060949325562, "learning_rate": 3.2500000000000004e-05, "loss": 0.019, "step": 742 }, { "epoch": 7.0, "eval_accuracy": 0.9553831231813773, "eval_f1": 0.8089005235602094, "eval_loss": 0.23766779899597168, "eval_precision": 0.7410071942446043, "eval_recall": 0.8904899135446686, "eval_runtime": 0.9461, "eval_samples_per_second": 197.656, "eval_steps_per_second": 3.171, "step": 742 }, { "epoch": 8.0, "grad_norm": 2.0428667068481445, "learning_rate": 3e-05, "loss": 0.0145, "step": 848 }, { "epoch": 8.0, "eval_accuracy": 0.9587778855480117, "eval_f1": 0.8279569892473119, "eval_loss": 0.24319829046726227, "eval_precision": 0.7758186397984886, "eval_recall": 0.8876080691642652, "eval_runtime": 0.9448, "eval_samples_per_second": 197.924, "eval_steps_per_second": 3.175, "step": 848 }, { "epoch": 9.0, "grad_norm": 0.5034522414207458, "learning_rate": 2.7500000000000004e-05, "loss": 0.0102, "step": 954 }, { "epoch": 9.0, "eval_accuracy": 0.9641125121241513, "eval_f1": 0.854024556616644, "eval_loss": 0.22865384817123413, "eval_precision": 0.810880829015544, "eval_recall": 0.9020172910662824, "eval_runtime": 0.9532, "eval_samples_per_second": 196.191, "eval_steps_per_second": 3.147, "step": 954 }, { "epoch": 10.0, "grad_norm": 0.0857200101017952, "learning_rate": 2.5e-05, "loss": 0.0067, "step": 1060 }, { "epoch": 10.0, "eval_accuracy": 0.9616876818622696, "eval_f1": 0.8442622950819673, "eval_loss": 0.24303482472896576, "eval_precision": 0.8025974025974026, "eval_recall": 0.8904899135446686, "eval_runtime": 0.9473, "eval_samples_per_second": 197.406, "eval_steps_per_second": 3.167, "step": 1060 }, { "epoch": 11.0, "grad_norm": 0.07605205476284027, "learning_rate": 2.25e-05, "loss": 0.0064, "step": 1166 }, { "epoch": 11.0, "eval_accuracy": 0.9602327837051406, "eval_f1": 0.8396739130434782, "eval_loss": 0.2674606740474701, "eval_precision": 0.794344473007712, "eval_recall": 0.8904899135446686, "eval_runtime": 0.9552, "eval_samples_per_second": 195.768, "eval_steps_per_second": 3.141, "step": 1166 }, { "epoch": 12.0, "grad_norm": 1.8096215724945068, "learning_rate": 2e-05, "loss": 0.0046, "step": 1272 }, { "epoch": 12.0, "eval_accuracy": 0.9619301648884578, "eval_f1": 0.8344549125168236, "eval_loss": 0.2742844820022583, "eval_precision": 0.7828282828282829, "eval_recall": 0.8933717579250721, "eval_runtime": 1.0274, "eval_samples_per_second": 182.02, "eval_steps_per_second": 2.92, "step": 1272 }, { "epoch": 13.0, "grad_norm": 0.9064086675643921, "learning_rate": 1.75e-05, "loss": 0.0034, "step": 1378 }, { "epoch": 13.0, "eval_accuracy": 0.9619301648884578, "eval_f1": 0.8451086956521738, "eval_loss": 0.2666186988353729, "eval_precision": 0.7994858611825193, "eval_recall": 0.8962536023054755, "eval_runtime": 0.9543, "eval_samples_per_second": 195.964, "eval_steps_per_second": 3.144, "step": 1378 }, { "epoch": 14.0, "grad_norm": 0.7048726081848145, "learning_rate": 1.5e-05, "loss": 0.0036, "step": 1484 }, { "epoch": 14.0, "eval_accuracy": 0.9633850630455868, "eval_f1": 0.8453038674033149, "eval_loss": 0.2606286108493805, "eval_precision": 0.8116710875331565, "eval_recall": 0.8818443804034583, "eval_runtime": 0.9565, "eval_samples_per_second": 195.509, "eval_steps_per_second": 3.137, "step": 1484 }, { "epoch": 15.0, "grad_norm": 0.10259877145290375, "learning_rate": 1.25e-05, "loss": 0.0027, "step": 1590 }, { "epoch": 15.0, "eval_accuracy": 0.9626576139670223, "eval_f1": 0.8405405405405406, "eval_loss": 0.2861556112766266, "eval_precision": 0.7913486005089059, "eval_recall": 0.8962536023054755, "eval_runtime": 0.9627, "eval_samples_per_second": 194.249, "eval_steps_per_second": 3.116, "step": 1590 }, { "epoch": 16.0, "grad_norm": 0.023264136165380478, "learning_rate": 1e-05, "loss": 0.0016, "step": 1696 }, { "epoch": 16.0, "eval_accuracy": 0.9629000969932104, "eval_f1": 0.8426812585499316, "eval_loss": 0.2792609930038452, "eval_precision": 0.8020833333333334, "eval_recall": 0.8876080691642652, "eval_runtime": 0.9674, "eval_samples_per_second": 193.295, "eval_steps_per_second": 3.101, "step": 1696 }, { "epoch": 17.0, "grad_norm": 2.410804271697998, "learning_rate": 7.5e-06, "loss": 0.0012, "step": 1802 }, { "epoch": 17.0, "eval_accuracy": 0.962172647914646, "eval_f1": 0.841248303934871, "eval_loss": 0.29505419731140137, "eval_precision": 0.7948717948717948, "eval_recall": 0.8933717579250721, "eval_runtime": 0.9612, "eval_samples_per_second": 194.551, "eval_steps_per_second": 3.121, "step": 1802 }, { "epoch": 18.0, "grad_norm": 0.004150555469095707, "learning_rate": 5e-06, "loss": 0.0012, "step": 1908 }, { "epoch": 18.0, "eval_accuracy": 0.9616876818622696, "eval_f1": 0.8380952380952381, "eval_loss": 0.29299911856651306, "eval_precision": 0.7938144329896907, "eval_recall": 0.8876080691642652, "eval_runtime": 0.9455, "eval_samples_per_second": 197.787, "eval_steps_per_second": 3.173, "step": 1908 }, { "epoch": 19.0, "grad_norm": 0.0056025926023721695, "learning_rate": 2.5e-06, "loss": 0.0014, "step": 2014 }, { "epoch": 19.0, "eval_accuracy": 0.9612027158098934, "eval_f1": 0.8353741496598639, "eval_loss": 0.29529693722724915, "eval_precision": 0.7912371134020618, "eval_recall": 0.8847262247838616, "eval_runtime": 0.9556, "eval_samples_per_second": 195.689, "eval_steps_per_second": 3.139, "step": 2014 }, { "epoch": 20.0, "grad_norm": 0.012953029945492744, "learning_rate": 0.0, "loss": 0.0007, "step": 2120 }, { "epoch": 20.0, "eval_accuracy": 0.9619301648884578, "eval_f1": 0.8387978142076504, "eval_loss": 0.29178306460380554, "eval_precision": 0.7974025974025974, "eval_recall": 0.8847262247838616, "eval_runtime": 0.9529, "eval_samples_per_second": 196.248, "eval_steps_per_second": 3.148, "step": 2120 }, { "epoch": 20.0, "step": 2120, "total_flos": 918687991011936.0, "train_loss": 0.03976648653734405, "train_runtime": 515.0309, "train_samples_per_second": 65.549, "train_steps_per_second": 4.116 } ], "logging_steps": 500, "max_steps": 2120, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 918687991011936.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }