{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2, "eval_steps": 50, "global_step": 750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 0.05632242560386658, "learning_rate": 0.00013297872340425532, "loss": 1.8852, "step": 50 }, { "epoch": 0.08, "eval_loss": 1.707169532775879, "eval_runtime": 27.6118, "eval_samples_per_second": 3.622, "eval_steps_per_second": 0.471, "step": 50 }, { "epoch": 0.16, "grad_norm": 0.02606302499771118, "learning_rate": 0.0002632978723404255, "loss": 1.6036, "step": 100 }, { "epoch": 0.16, "eval_loss": 1.4855879545211792, "eval_runtime": 27.6396, "eval_samples_per_second": 3.618, "eval_steps_per_second": 0.47, "step": 100 }, { "epoch": 0.24, "grad_norm": 0.034023039042949677, "learning_rate": 0.00039627659574468084, "loss": 1.5186, "step": 150 }, { "epoch": 0.24, "eval_loss": 1.466215968132019, "eval_runtime": 27.5933, "eval_samples_per_second": 3.624, "eval_steps_per_second": 0.471, "step": 150 }, { "epoch": 0.32, "grad_norm": 0.04070857912302017, "learning_rate": 0.0004967397747480735, "loss": 1.4822, "step": 200 }, { "epoch": 0.32, "eval_loss": 1.4602761268615723, "eval_runtime": 27.615, "eval_samples_per_second": 3.621, "eval_steps_per_second": 0.471, "step": 200 }, { "epoch": 0.4, "grad_norm": 0.03537657856941223, "learning_rate": 0.00048192056905749855, "loss": 1.5035, "step": 250 }, { "epoch": 0.4, "eval_loss": 1.4578460454940796, "eval_runtime": 27.6799, "eval_samples_per_second": 3.613, "eval_steps_per_second": 0.47, "step": 250 }, { "epoch": 0.48, "grad_norm": 0.040425803512334824, "learning_rate": 0.00046710136336692356, "loss": 1.4813, "step": 300 }, { "epoch": 0.48, "eval_loss": 1.4557801485061646, "eval_runtime": 27.6578, "eval_samples_per_second": 3.616, "eval_steps_per_second": 0.47, "step": 300 }, { "epoch": 0.56, "grad_norm": 0.03970955312252045, "learning_rate": 0.00045228215767634857, "loss": 1.4878, "step": 350 }, { "epoch": 0.56, "eval_loss": 1.4533815383911133, "eval_runtime": 27.6598, "eval_samples_per_second": 3.615, "eval_steps_per_second": 0.47, "step": 350 }, { "epoch": 0.64, "grad_norm": 0.03238854929804802, "learning_rate": 0.0004374629519857736, "loss": 1.4765, "step": 400 }, { "epoch": 0.64, "eval_loss": 1.4522851705551147, "eval_runtime": 27.5987, "eval_samples_per_second": 3.623, "eval_steps_per_second": 0.471, "step": 400 }, { "epoch": 0.72, "grad_norm": 0.04000236839056015, "learning_rate": 0.0004226437462951986, "loss": 1.4803, "step": 450 }, { "epoch": 0.72, "eval_loss": 1.4484608173370361, "eval_runtime": 27.6359, "eval_samples_per_second": 3.618, "eval_steps_per_second": 0.47, "step": 450 }, { "epoch": 0.8, "grad_norm": 0.035757772624492645, "learning_rate": 0.0004078245406046236, "loss": 1.4925, "step": 500 }, { "epoch": 0.8, "eval_loss": 1.4477622509002686, "eval_runtime": 27.6496, "eval_samples_per_second": 3.617, "eval_steps_per_second": 0.47, "step": 500 }, { "epoch": 0.88, "grad_norm": 0.039886392652988434, "learning_rate": 0.0003930053349140486, "loss": 1.49, "step": 550 }, { "epoch": 0.88, "eval_loss": 1.4466557502746582, "eval_runtime": 27.6503, "eval_samples_per_second": 3.617, "eval_steps_per_second": 0.47, "step": 550 }, { "epoch": 0.96, "grad_norm": 0.035769980400800705, "learning_rate": 0.00037818612922347364, "loss": 1.4888, "step": 600 }, { "epoch": 0.96, "eval_loss": 1.4460580348968506, "eval_runtime": 27.6213, "eval_samples_per_second": 3.62, "eval_steps_per_second": 0.471, "step": 600 }, { "epoch": 1.04, "grad_norm": 0.039503153413534164, "learning_rate": 0.00036336692353289865, "loss": 1.4732, "step": 650 }, { "epoch": 1.04, "eval_loss": 1.4470006227493286, "eval_runtime": 27.6084, "eval_samples_per_second": 3.622, "eval_steps_per_second": 0.471, "step": 650 }, { "epoch": 1.12, "grad_norm": 0.04102110490202904, "learning_rate": 0.00034854771784232366, "loss": 1.4677, "step": 700 }, { "epoch": 1.12, "eval_loss": 1.4475681781768799, "eval_runtime": 27.6032, "eval_samples_per_second": 3.623, "eval_steps_per_second": 0.471, "step": 700 } ], "logging_steps": 50, "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.641442504933376e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }