{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "eval_steps": 500, "global_step": 2862, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9968553459119497, "grad_norm": 0.7500613927841187, "learning_rate": 1.7784765897973445e-05, "loss": 0.574, "step": 317 }, { "epoch": 1.0, "eval_accuracy": 0.645483870967742, "eval_loss": 0.2891772985458374, "eval_runtime": 1.5814, "eval_samples_per_second": 1960.332, "eval_steps_per_second": 41.104, "step": 318 }, { "epoch": 1.9937106918238994, "grad_norm": 0.5959137678146362, "learning_rate": 1.556953179594689e-05, "loss": 0.2286, "step": 634 }, { "epoch": 2.0, "eval_accuracy": 0.8493548387096774, "eval_loss": 0.11736558377742767, "eval_runtime": 1.4156, "eval_samples_per_second": 2189.928, "eval_steps_per_second": 45.918, "step": 636 }, { "epoch": 2.990566037735849, "grad_norm": 0.5286552309989929, "learning_rate": 1.3354297693920338e-05, "loss": 0.1243, "step": 951 }, { "epoch": 3.0, "eval_accuracy": 0.8941935483870967, "eval_loss": 0.06792479753494263, "eval_runtime": 1.6008, "eval_samples_per_second": 1936.498, "eval_steps_per_second": 40.604, "step": 954 }, { "epoch": 3.9874213836477987, "grad_norm": 0.4791622459888458, "learning_rate": 1.1139063591893781e-05, "loss": 0.086, "step": 1268 }, { "epoch": 4.0, "eval_accuracy": 0.9119354838709678, "eval_loss": 0.04815125837922096, "eval_runtime": 1.4165, "eval_samples_per_second": 2188.476, "eval_steps_per_second": 45.887, "step": 1272 }, { "epoch": 4.984276729559748, "grad_norm": 0.36969393491744995, "learning_rate": 8.923829489867226e-06, "loss": 0.0682, "step": 1585 }, { "epoch": 5.0, "eval_accuracy": 0.9212903225806451, "eval_loss": 0.039599157869815826, "eval_runtime": 1.6105, "eval_samples_per_second": 1924.896, "eval_steps_per_second": 40.361, "step": 1590 }, { "epoch": 5.981132075471698, "grad_norm": 0.41220352053642273, "learning_rate": 6.708595387840672e-06, "loss": 0.0589, "step": 1902 }, { "epoch": 6.0, "eval_accuracy": 0.9248387096774193, "eval_loss": 0.034682855010032654, "eval_runtime": 1.4265, "eval_samples_per_second": 2173.216, "eval_steps_per_second": 45.567, "step": 1908 }, { "epoch": 6.977987421383648, "grad_norm": 0.2867417633533478, "learning_rate": 4.4933612858141165e-06, "loss": 0.0535, "step": 2219 }, { "epoch": 7.0, "eval_accuracy": 0.9303225806451613, "eval_loss": 0.032181933522224426, "eval_runtime": 1.6126, "eval_samples_per_second": 1922.335, "eval_steps_per_second": 40.307, "step": 2226 }, { "epoch": 7.9748427672955975, "grad_norm": 0.30042725801467896, "learning_rate": 2.2781271837875614e-06, "loss": 0.0505, "step": 2536 }, { "epoch": 8.0, "eval_accuracy": 0.9306451612903226, "eval_loss": 0.030586862936615944, "eval_runtime": 1.4231, "eval_samples_per_second": 2178.305, "eval_steps_per_second": 45.674, "step": 2544 }, { "epoch": 8.971698113207546, "grad_norm": 0.3094331622123718, "learning_rate": 6.289308176100629e-08, "loss": 0.0486, "step": 2853 } ], "logging_steps": 317, "max_steps": 2862, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 1000000000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 742988563029384.0, "train_batch_size": 48, "trial_name": null, "trial_params": { "alpha": 0.8610448302361252, "num_train_epochs": 9, "temperature": 3 } }