{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0012221299531516852, "eval_steps": 5, "global_step": 39, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.1336665465427825e-05, "eval_loss": 11.820560455322266, "eval_runtime": 130.7138, "eval_samples_per_second": 102.797, "eval_steps_per_second": 51.402, "step": 1 }, { "epoch": 9.400999639628348e-05, "grad_norm": 9.347860336303711, "learning_rate": 2.857142857142857e-05, "loss": 11.7883, "step": 3 }, { "epoch": 0.00015668332732713913, "eval_loss": 11.753523826599121, "eval_runtime": 130.2236, "eval_samples_per_second": 103.184, "eval_steps_per_second": 51.596, "step": 5 }, { "epoch": 0.00018801999279256695, "grad_norm": 8.440356254577637, "learning_rate": 5.714285714285714e-05, "loss": 11.7292, "step": 6 }, { "epoch": 0.0002820299891888504, "grad_norm": 7.404284954071045, "learning_rate": 8.571428571428571e-05, "loss": 11.5233, "step": 9 }, { "epoch": 0.00031336665465427825, "eval_loss": 11.403406143188477, "eval_runtime": 130.469, "eval_samples_per_second": 102.99, "eval_steps_per_second": 51.499, "step": 10 }, { "epoch": 0.0003760399855851339, "grad_norm": 7.426136493682861, "learning_rate": 0.00011428571428571428, "loss": 11.4367, "step": 12 }, { "epoch": 0.00047004998198141735, "grad_norm": 7.491276741027832, "learning_rate": 0.00014285714285714287, "loss": 11.28, "step": 15 }, { "epoch": 0.00047004998198141735, "eval_loss": 10.960875511169434, "eval_runtime": 129.9537, "eval_samples_per_second": 103.398, "eval_steps_per_second": 51.703, "step": 15 }, { "epoch": 0.0005640599783777008, "grad_norm": 6.318249225616455, "learning_rate": 0.00017142857142857143, "loss": 10.8666, "step": 18 }, { "epoch": 0.0006267333093085565, "eval_loss": 10.430113792419434, "eval_runtime": 129.8947, "eval_samples_per_second": 103.445, "eval_steps_per_second": 51.727, "step": 20 }, { "epoch": 0.0006580699747739844, "grad_norm": 5.295856475830078, "learning_rate": 0.0002, "loss": 10.565, "step": 21 }, { "epoch": 0.0007520799711702678, "grad_norm": 4.21265172958374, "learning_rate": 0.00018660254037844388, "loss": 10.1523, "step": 24 }, { "epoch": 0.0007834166366356955, "eval_loss": 9.9229154586792, "eval_runtime": 130.5123, "eval_samples_per_second": 102.956, "eval_steps_per_second": 51.482, "step": 25 }, { "epoch": 0.0008460899675665512, "grad_norm": 4.139631271362305, "learning_rate": 0.00015000000000000001, "loss": 9.9544, "step": 27 }, { "epoch": 0.0009400999639628347, "grad_norm": 3.844069242477417, "learning_rate": 0.0001, "loss": 9.713, "step": 30 }, { "epoch": 0.0009400999639628347, "eval_loss": 9.596590995788574, "eval_runtime": 130.3595, "eval_samples_per_second": 103.076, "eval_steps_per_second": 51.542, "step": 30 }, { "epoch": 0.0010341099603591183, "grad_norm": 3.5350520610809326, "learning_rate": 5.000000000000002e-05, "loss": 9.5719, "step": 33 }, { "epoch": 0.0010967832912899737, "eval_loss": 9.454024314880371, "eval_runtime": 130.2285, "eval_samples_per_second": 103.18, "eval_steps_per_second": 51.594, "step": 35 }, { "epoch": 0.0011281199567554016, "grad_norm": 2.883392095565796, "learning_rate": 1.339745962155613e-05, "loss": 9.4912, "step": 36 }, { "epoch": 0.0012221299531516852, "grad_norm": 2.996534824371338, "learning_rate": 0.0, "loss": 9.4766, "step": 39 } ], "logging_steps": 3, "max_steps": 39, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 21, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5294156414976.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }