{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9968387776606953, "eval_steps": 500, "global_step": 1422, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9989462592202318, "eval_accuracy": 0.7725118483412322, "eval_loss": 0.5226067304611206, "eval_runtime": 13.0982, "eval_samples_per_second": 32.218, "eval_steps_per_second": 8.093, "step": 474 }, { "epoch": 1.053740779768177, "grad_norm": 7.272853851318359, "learning_rate": 0.0006522557186771741, "loss": 0.5991, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.8317535545023697, "eval_loss": 0.4196150302886963, "eval_runtime": 13.0934, "eval_samples_per_second": 32.23, "eval_steps_per_second": 8.096, "step": 949 }, { "epoch": 2.107481559536354, "grad_norm": 15.65460205078125, "learning_rate": 0.0001818105588338676, "loss": 0.4192, "step": 1000 }, { "epoch": 2.9968387776606953, "eval_accuracy": 0.8388625592417062, "eval_loss": 0.3961751163005829, "eval_runtime": 13.1095, "eval_samples_per_second": 32.19, "eval_steps_per_second": 8.086, "step": 1422 } ], "logging_steps": 500, "max_steps": 1422, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 6754275451469824.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }