{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 2.1120104789733887, "learning_rate": 4.9214579028215776e-05, "loss": 0.6061, "num_input_tokens_seen": 32448, "step": 10 }, { "epoch": 0.16, "grad_norm": 3.365793228149414, "learning_rate": 4.690766700109659e-05, "loss": 0.354, "num_input_tokens_seen": 64640, "step": 20 }, { "epoch": 0.24, "grad_norm": 4.048572540283203, "learning_rate": 4.3224215685535294e-05, "loss": 0.3654, "num_input_tokens_seen": 97312, "step": 30 }, { "epoch": 0.32, "grad_norm": 11.761089324951172, "learning_rate": 3.8395669874474915e-05, "loss": 0.316, "num_input_tokens_seen": 128960, "step": 40 }, { "epoch": 0.4, "grad_norm": 8.3576078414917, "learning_rate": 3.272542485937369e-05, "loss": 0.3752, "num_input_tokens_seen": 161904, "step": 50 }, { "epoch": 0.48, "grad_norm": 3.2308974266052246, "learning_rate": 2.656976298823284e-05, "loss": 0.2698, "num_input_tokens_seen": 193568, "step": 60 }, { "epoch": 0.56, "grad_norm": 3.596054792404175, "learning_rate": 2.031546713535688e-05, "loss": 0.2602, "num_input_tokens_seen": 225264, "step": 70 }, { "epoch": 0.64, "grad_norm": 4.938518524169922, "learning_rate": 1.4355517710873184e-05, "loss": 0.2667, "num_input_tokens_seen": 257376, "step": 80 }, { "epoch": 0.72, "grad_norm": 9.911075592041016, "learning_rate": 9.064400256282757e-06, "loss": 0.2667, "num_input_tokens_seen": 289728, "step": 90 }, { "epoch": 0.8, "grad_norm": 4.612339973449707, "learning_rate": 4.7745751406263165e-06, "loss": 0.2298, "num_input_tokens_seen": 322000, "step": 100 }, { "epoch": 0.88, "grad_norm": 4.9222307205200195, "learning_rate": 1.7555878527937164e-06, "loss": 0.2468, "num_input_tokens_seen": 354432, "step": 110 }, { "epoch": 0.96, "grad_norm": 6.1633620262146, "learning_rate": 1.9713246713805588e-07, "loss": 0.2118, "num_input_tokens_seen": 386896, "step": 120 }, { "epoch": 1.0, "num_input_tokens_seen": 403072, "step": 125, "total_flos": 1.7254973841604608e+16, "train_loss": 0.3098531789779663, "train_runtime": 8159.7352, "train_samples_per_second": 0.123, "train_steps_per_second": 0.015 } ], "logging_steps": 10, "max_steps": 125, "num_input_tokens_seen": 403072, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7254973841604608e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }