{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.014311270125223614, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0035778175313059034, "grad_norm": 0.5825825929641724, "learning_rate": 0.00019928443649373882, "loss": 1.248, "num_input_tokens_seen": 6646, "step": 10 }, { "epoch": 0.007155635062611807, "grad_norm": 0.5380188822746277, "learning_rate": 0.00019856887298747765, "loss": 0.5478, "num_input_tokens_seen": 13063, "step": 20 }, { "epoch": 0.01073345259391771, "grad_norm": 0.3872911036014557, "learning_rate": 0.00019785330948121648, "loss": 0.5135, "num_input_tokens_seen": 19512, "step": 30 }, { "epoch": 0.014311270125223614, "grad_norm": 0.4991438686847687, "learning_rate": 0.0001971377459749553, "loss": 0.5092, "num_input_tokens_seen": 26884, "step": 40 } ], "logging_steps": 10, "max_steps": 2795, "num_input_tokens_seen": 26884, "num_train_epochs": 1, "save_steps": 20, "total_flos": 604526222057472.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }