{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.161290322580645, "eval_steps": 1, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5161290322580645, "grad_norm": 133.33294677734375, "learning_rate": 2.5e-05, "loss": 1.6332, "step": 1 }, { "epoch": 0.5161290322580645, "eval_accuracy": 0.5161290322580645, "eval_f1": 0.3712905452035887, "eval_loss": 2.060542583465576, "eval_runtime": 9.1597, "eval_samples_per_second": 6.769, "eval_steps_per_second": 0.873, "step": 1 }, { "epoch": 1.032258064516129, "grad_norm": 125.3294677734375, "learning_rate": 5e-05, "loss": 1.6002, "step": 2 }, { "epoch": 1.032258064516129, "eval_accuracy": 0.4838709677419355, "eval_f1": 0.269908386187456, "eval_loss": 4.0351104736328125, "eval_runtime": 9.1436, "eval_samples_per_second": 6.781, "eval_steps_per_second": 0.875, "step": 2 }, { "epoch": 1.5483870967741935, "grad_norm": 393.05718994140625, "learning_rate": 4.375e-05, "loss": 4.4364, "step": 3 }, { "epoch": 1.5483870967741935, "eval_accuracy": 0.7741935483870968, "eval_f1": 0.5378874856486796, "eval_loss": 2.373875379562378, "eval_runtime": 9.1471, "eval_samples_per_second": 6.778, "eval_steps_per_second": 0.875, "step": 3 }, { "epoch": 2.064516129032258, "grad_norm": 109.7702407836914, "learning_rate": 3.7500000000000003e-05, "loss": 1.4704, "step": 4 }, { "epoch": 2.064516129032258, "eval_accuracy": 0.7580645161290323, "eval_f1": 0.5238376674546887, "eval_loss": 2.210745096206665, "eval_runtime": 9.1416, "eval_samples_per_second": 6.782, "eval_steps_per_second": 0.875, "step": 4 }, { "epoch": 2.5806451612903225, "grad_norm": 111.4744644165039, "learning_rate": 3.125e-05, "loss": 1.9734, "step": 5 }, { "epoch": 2.5806451612903225, "eval_accuracy": 0.8225806451612904, "eval_f1": 0.5721364589289117, "eval_loss": 1.480078935623169, "eval_runtime": 9.1345, "eval_samples_per_second": 6.787, "eval_steps_per_second": 0.876, "step": 5 }, { "epoch": 3.096774193548387, "grad_norm": 57.42088317871094, "learning_rate": 2.5e-05, "loss": 0.5839, "step": 6 }, { "epoch": 3.096774193548387, "eval_accuracy": 0.8870967741935484, "eval_f1": 0.6163557565479555, "eval_loss": 0.7713032364845276, "eval_runtime": 9.1377, "eval_samples_per_second": 6.785, "eval_steps_per_second": 0.875, "step": 6 }, { "epoch": 3.6129032258064515, "grad_norm": 44.42446517944336, "learning_rate": 1.8750000000000002e-05, "loss": 0.3722, "step": 7 }, { "epoch": 3.6129032258064515, "eval_accuracy": 0.8548387096774194, "eval_f1": 0.7293271700051361, "eval_loss": 0.4509028494358063, "eval_runtime": 9.1356, "eval_samples_per_second": 6.787, "eval_steps_per_second": 0.876, "step": 7 }, { "epoch": 4.129032258064516, "grad_norm": 18.921249389648438, "learning_rate": 1.25e-05, "loss": 0.18, "step": 8 }, { "epoch": 4.129032258064516, "eval_accuracy": 0.8548387096774194, "eval_f1": 0.7942615201216756, "eval_loss": 0.661185622215271, "eval_runtime": 9.1322, "eval_samples_per_second": 6.789, "eval_steps_per_second": 0.876, "step": 8 }, { "epoch": 4.645161290322581, "grad_norm": 40.99948501586914, "learning_rate": 6.25e-06, "loss": 0.1994, "step": 9 }, { "epoch": 4.645161290322581, "eval_accuracy": 0.8870967741935484, "eval_f1": 0.8194187528260781, "eval_loss": 0.5956389307975769, "eval_runtime": 9.1295, "eval_samples_per_second": 6.791, "eval_steps_per_second": 0.876, "step": 9 }, { "epoch": 5.161290322580645, "grad_norm": 45.76235580444336, "learning_rate": 0.0, "loss": 0.2887, "step": 10 }, { "epoch": 5.161290322580645, "eval_accuracy": 0.8548387096774194, "eval_f1": 0.7771043771043771, "eval_loss": 0.5316924452781677, "eval_runtime": 9.1285, "eval_samples_per_second": 6.792, "eval_steps_per_second": 0.876, "step": 10 }, { "epoch": 5.161290322580645, "step": 10, "total_flos": 9605361558159360.0, "train_loss": 1.2737861171364784, "train_runtime": 685.5263, "train_samples_per_second": 3.559, "train_steps_per_second": 0.015 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 9605361558159360.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }