{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0033983744442241796, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011327914814080598, "eval_loss": 10.376569747924805, "eval_runtime": 21.9746, "eval_samples_per_second": 169.15, "eval_steps_per_second": 84.598, "step": 1 }, { "epoch": 0.0003398374444224179, "grad_norm": 0.042266711592674255, "learning_rate": 0.00012, "loss": 10.3737, "step": 3 }, { "epoch": 0.0006796748888448359, "grad_norm": 0.057990022003650665, "learning_rate": 0.0001992114701314478, "loss": 10.3771, "step": 6 }, { "epoch": 0.0009062331851264478, "eval_loss": 10.375176429748535, "eval_runtime": 22.0391, "eval_samples_per_second": 168.655, "eval_steps_per_second": 84.35, "step": 8 }, { "epoch": 0.0010195123332672538, "grad_norm": 0.048735857009887695, "learning_rate": 0.00018763066800438636, "loss": 10.3756, "step": 9 }, { "epoch": 0.0013593497776896717, "grad_norm": 0.04772263020277023, "learning_rate": 0.000163742398974869, "loss": 10.3747, "step": 12 }, { "epoch": 0.0016991872221120898, "grad_norm": 0.05866085737943649, "learning_rate": 0.00013090169943749476, "loss": 10.3747, "step": 15 }, { "epoch": 0.0018124663702528956, "eval_loss": 10.373068809509277, "eval_runtime": 22.1328, "eval_samples_per_second": 167.941, "eval_steps_per_second": 83.993, "step": 16 }, { "epoch": 0.0020390246665345077, "grad_norm": 0.05547315254807472, "learning_rate": 9.372094804706867e-05, "loss": 10.3714, "step": 18 }, { "epoch": 0.0023788621109569258, "grad_norm": 0.07062539458274841, "learning_rate": 5.7422070843492734e-05, "loss": 10.3731, "step": 21 }, { "epoch": 0.0027186995553793434, "grad_norm": 0.07306697219610214, "learning_rate": 2.7103137257858868e-05, "loss": 10.3711, "step": 24 }, { "epoch": 0.0027186995553793434, "eval_loss": 10.371893882751465, "eval_runtime": 22.0107, "eval_samples_per_second": 168.872, "eval_steps_per_second": 84.459, "step": 24 }, { "epoch": 0.0030585369998017615, "grad_norm": 0.06676942855119705, "learning_rate": 7.022351411174866e-06, "loss": 10.3717, "step": 27 }, { "epoch": 0.0033983744442241796, "grad_norm": 0.07616107165813446, "learning_rate": 0.0, "loss": 10.3713, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 849914757120.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }