{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9263157894736842, "eval_steps": 3, "global_step": 11, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 0.3172740936279297, "learning_rate": 1.0000000000000002e-06, "loss": 0.9286, "step": 1 }, { "epoch": 0.08, "eval_loss": 1.0365231037139893, "eval_runtime": 24.9628, "eval_samples_per_second": 0.801, "eval_steps_per_second": 0.801, "step": 1 }, { "epoch": 0.17, "grad_norm": 0.3426271378993988, "learning_rate": 2.0000000000000003e-06, "loss": 0.951, "step": 2 }, { "epoch": 0.25, "grad_norm": 0.28614237904548645, "learning_rate": 3e-06, "loss": 0.9238, "step": 3 }, { "epoch": 0.25, "eval_loss": 1.0386725664138794, "eval_runtime": 25.232, "eval_samples_per_second": 0.793, "eval_steps_per_second": 0.793, "step": 3 }, { "epoch": 0.34, "grad_norm": 0.3368183672428131, "learning_rate": 4.000000000000001e-06, "loss": 0.9443, "step": 4 }, { "epoch": 0.42, "grad_norm": 0.37111783027648926, "learning_rate": 5e-06, "loss": 0.9553, "step": 5 }, { "epoch": 0.51, "grad_norm": 0.3505602478981018, "learning_rate": 6e-06, "loss": 0.9448, "step": 6 }, { "epoch": 0.51, "eval_loss": 1.0401721000671387, "eval_runtime": 25.2324, "eval_samples_per_second": 0.793, "eval_steps_per_second": 0.793, "step": 6 }, { "epoch": 0.59, "grad_norm": 0.27382639050483704, "learning_rate": 7e-06, "loss": 0.9339, "step": 7 }, { "epoch": 0.67, "grad_norm": 0.3769858479499817, "learning_rate": 8.000000000000001e-06, "loss": 0.9662, "step": 8 }, { "epoch": 0.76, "grad_norm": 0.3182668387889862, "learning_rate": 9e-06, "loss": 0.9182, "step": 9 }, { "epoch": 0.76, "eval_loss": 1.0365254878997803, "eval_runtime": 25.3228, "eval_samples_per_second": 0.79, "eval_steps_per_second": 0.79, "step": 9 }, { "epoch": 0.84, "grad_norm": 0.32636308670043945, "learning_rate": 1e-05, "loss": 0.959, "step": 10 }, { "epoch": 0.93, "grad_norm": 0.2772234082221985, "learning_rate": 0.0, "loss": 0.9204, "step": 11 } ], "logging_steps": 1, "max_steps": 11, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1.1152453558861824e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }