{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9935483870967742, "eval_steps": 500, "global_step": 77, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "grad_norm": 0.6195777654647827, "learning_rate": 0.00019549667548552556, "loss": 3.2597, "step": 11 }, { "epoch": 0.28, "grad_norm": 0.6481589674949646, "learning_rate": 0.00017146733860429612, "loss": 2.9913, "step": 22 }, { "epoch": 0.43, "grad_norm": 0.88404381275177, "learning_rate": 0.00013171912885891063, "loss": 2.9169, "step": 33 }, { "epoch": 0.57, "grad_norm": 0.6059058904647827, "learning_rate": 8.499446016553474e-05, "loss": 2.8932, "step": 44 }, { "epoch": 0.71, "grad_norm": 0.6640863418579102, "learning_rate": 4.1570182637163155e-05, "loss": 2.9104, "step": 55 }, { "epoch": 0.85, "grad_norm": 0.660681426525116, "learning_rate": 1.0997242356532334e-05, "loss": 2.8961, "step": 66 }, { "epoch": 0.99, "grad_norm": 0.8390025496482849, "learning_rate": 0.0, "loss": 2.9082, "step": 77 } ], "logging_steps": 11, "max_steps": 77, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 8847141698304000.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }