{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 3180, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9968553459119497, "grad_norm": 0.9062672853469849, "learning_rate": 1.8006289308176103e-05, "loss": 0.8042, "step": 317 }, { "epoch": 1.0, "eval_accuracy": 0.6709677419354839, "eval_loss": 0.4064599573612213, "eval_runtime": 1.4275, "eval_samples_per_second": 2171.572, "eval_steps_per_second": 45.533, "step": 318 }, { "epoch": 1.9937106918238994, "grad_norm": 0.7205497026443481, "learning_rate": 1.6012578616352204e-05, "loss": 0.3038, "step": 634 }, { "epoch": 2.0, "eval_accuracy": 0.847741935483871, "eval_loss": 0.1362968385219574, "eval_runtime": 1.6105, "eval_samples_per_second": 1924.868, "eval_steps_per_second": 40.36, "step": 636 }, { "epoch": 2.990566037735849, "grad_norm": 0.5987477898597717, "learning_rate": 1.4018867924528304e-05, "loss": 0.1395, "step": 951 }, { "epoch": 3.0, "eval_accuracy": 0.8990322580645161, "eval_loss": 0.07024983316659927, "eval_runtime": 1.6105, "eval_samples_per_second": 1924.879, "eval_steps_per_second": 40.36, "step": 954 }, { "epoch": 3.9874213836477987, "grad_norm": 0.5540674924850464, "learning_rate": 1.2025157232704403e-05, "loss": 0.0891, "step": 1268 }, { "epoch": 4.0, "eval_accuracy": 0.9187096774193548, "eval_loss": 0.04933710768818855, "eval_runtime": 1.7991, "eval_samples_per_second": 1723.062, "eval_steps_per_second": 36.129, "step": 1272 }, { "epoch": 4.984276729559748, "grad_norm": 0.42864474654197693, "learning_rate": 1.0031446540880504e-05, "loss": 0.0692, "step": 1585 }, { "epoch": 5.0, "eval_accuracy": 0.9241935483870968, "eval_loss": 0.04158218950033188, "eval_runtime": 1.6087, "eval_samples_per_second": 1927.079, "eval_steps_per_second": 40.406, "step": 1590 }, { "epoch": 5.981132075471698, "grad_norm": 0.4545074999332428, "learning_rate": 8.037735849056606e-06, "loss": 0.0595, "step": 1902 }, { "epoch": 6.0, "eval_accuracy": 0.9270967741935484, "eval_loss": 0.03682653605937958, "eval_runtime": 1.4287, "eval_samples_per_second": 2169.74, "eval_steps_per_second": 45.495, "step": 1908 }, { "epoch": 6.977987421383648, "grad_norm": 0.34796932339668274, "learning_rate": 6.044025157232704e-06, "loss": 0.0538, "step": 2219 }, { "epoch": 7.0, "eval_accuracy": 0.9316129032258065, "eval_loss": 0.03404370695352554, "eval_runtime": 1.6077, "eval_samples_per_second": 1928.227, "eval_steps_per_second": 40.431, "step": 2226 }, { "epoch": 7.9748427672955975, "grad_norm": 0.3150351941585541, "learning_rate": 4.0503144654088055e-06, "loss": 0.0503, "step": 2536 }, { "epoch": 8.0, "eval_accuracy": 0.9341935483870968, "eval_loss": 0.03234480321407318, "eval_runtime": 1.4217, "eval_samples_per_second": 2180.525, "eval_steps_per_second": 45.721, "step": 2544 }, { "epoch": 8.971698113207546, "grad_norm": 0.3166097104549408, "learning_rate": 2.056603773584906e-06, "loss": 0.0479, "step": 2853 }, { "epoch": 9.0, "eval_accuracy": 0.9341935483870968, "eval_loss": 0.03134315088391304, "eval_runtime": 1.6077, "eval_samples_per_second": 1928.261, "eval_steps_per_second": 40.431, "step": 2862 }, { "epoch": 9.968553459119496, "grad_norm": 0.27577438950538635, "learning_rate": 6.289308176100629e-08, "loss": 0.0467, "step": 3170 } ], "logging_steps": 317, "max_steps": 3180, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000000000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 827333546055996.0, "train_batch_size": 48, "trial_name": null, "trial_params": { "alpha": 0.5781383032678951, "num_train_epochs": 10, "temperature": 2 } }