{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.037993920972644375, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007598784194528875, "eval_loss": 8.610437393188477, "eval_runtime": 7.5213, "eval_samples_per_second": 73.791, "eval_steps_per_second": 36.962, "step": 1 }, { "epoch": 0.003799392097264438, "grad_norm": 3.1603012084960938, "learning_rate": 5e-05, "loss": 8.5891, "step": 5 }, { "epoch": 0.007598784194528876, "grad_norm": 2.109530448913574, "learning_rate": 0.0001, "loss": 8.2812, "step": 10 }, { "epoch": 0.007598784194528876, "eval_loss": 8.542115211486816, "eval_runtime": 7.519, "eval_samples_per_second": 73.813, "eval_steps_per_second": 36.973, "step": 10 }, { "epoch": 0.011398176291793313, "grad_norm": 2.883855104446411, "learning_rate": 9.619397662556435e-05, "loss": 8.3708, "step": 15 }, { "epoch": 0.015197568389057751, "grad_norm": 2.5980207920074463, "learning_rate": 8.535533905932738e-05, "loss": 8.1666, "step": 20 }, { "epoch": 0.015197568389057751, "eval_loss": 8.359399795532227, "eval_runtime": 7.655, "eval_samples_per_second": 72.502, "eval_steps_per_second": 36.316, "step": 20 }, { "epoch": 0.018996960486322188, "grad_norm": 3.1167116165161133, "learning_rate": 6.91341716182545e-05, "loss": 8.095, "step": 25 }, { "epoch": 0.022796352583586626, "grad_norm": 2.891735792160034, "learning_rate": 5e-05, "loss": 8.1248, "step": 30 }, { "epoch": 0.022796352583586626, "eval_loss": 8.240621566772461, "eval_runtime": 7.7515, "eval_samples_per_second": 71.599, "eval_steps_per_second": 35.864, "step": 30 }, { "epoch": 0.026595744680851064, "grad_norm": 2.1788597106933594, "learning_rate": 3.086582838174551e-05, "loss": 7.9935, "step": 35 }, { "epoch": 0.030395136778115502, "grad_norm": 2.944188356399536, "learning_rate": 1.4644660940672627e-05, "loss": 8.1376, "step": 40 }, { "epoch": 0.030395136778115502, "eval_loss": 8.18637752532959, "eval_runtime": 7.9085, "eval_samples_per_second": 70.177, "eval_steps_per_second": 35.152, "step": 40 }, { "epoch": 0.03419452887537994, "grad_norm": 2.2056612968444824, "learning_rate": 3.8060233744356633e-06, "loss": 8.1537, "step": 45 }, { "epoch": 0.037993920972644375, "grad_norm": 2.681415319442749, "learning_rate": 0.0, "loss": 8.181, "step": 50 }, { "epoch": 0.037993920972644375, "eval_loss": 8.177650451660156, "eval_runtime": 7.6687, "eval_samples_per_second": 72.372, "eval_steps_per_second": 36.251, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6178892808192.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }