{ "best_metric": 2.7149553298950195, "best_model_checkpoint": "/home/datta0/models/lora_final/gemma-2-9b_pct_ortho/checkpoint-8", "epoch": 0.18543259557344063, "eval_steps": 8, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002575452716297787, "grad_norm": 6.355404376983643, "learning_rate": 1.25e-05, "loss": 2.1779, "step": 1 }, { "epoch": 0.010301810865191148, "grad_norm": 7.009632587432861, "learning_rate": 5e-05, "loss": 2.2793, "step": 4 }, { "epoch": 0.020603621730382295, "grad_norm": 5.262685298919678, "learning_rate": 0.0001, "loss": 2.3111, "step": 8 }, { "epoch": 0.020603621730382295, "eval_loss": 2.7149553298950195, "eval_runtime": 519.9082, "eval_samples_per_second": 0.471, "eval_steps_per_second": 0.471, "step": 8 }, { "epoch": 0.03090543259557344, "grad_norm": 8.695486068725586, "learning_rate": 9.997266286704631e-05, "loss": 6.4602, "step": 12 }, { "epoch": 0.04120724346076459, "grad_norm": 3.0442869663238525, "learning_rate": 9.989068136093873e-05, "loss": 11.4782, "step": 16 }, { "epoch": 0.04120724346076459, "eval_loss": 11.982385635375977, "eval_runtime": 308.4334, "eval_samples_per_second": 0.794, "eval_steps_per_second": 0.794, "step": 16 }, { "epoch": 0.05150905432595573, "grad_norm": 1.3675501346588135, "learning_rate": 9.975414512725057e-05, "loss": 11.9837, "step": 20 }, { "epoch": 0.06181086519114688, "grad_norm": 1.629292607307434, "learning_rate": 9.956320346634876e-05, "loss": 11.9866, "step": 24 }, { "epoch": 0.06181086519114688, "eval_loss": 12.02308464050293, "eval_runtime": 203.4649, "eval_samples_per_second": 1.204, "eval_steps_per_second": 1.204, "step": 24 }, { "epoch": 0.07211267605633803, "grad_norm": 0.635460615158081, "learning_rate": 9.931806517013612e-05, "loss": 12.0407, "step": 28 }, { "epoch": 0.08241448692152918, "grad_norm": 0.50620037317276, "learning_rate": 9.901899829374047e-05, "loss": 12.0022, "step": 32 }, { "epoch": 0.08241448692152918, "eval_loss": 12.044427871704102, "eval_runtime": 327.7452, "eval_samples_per_second": 0.748, "eval_steps_per_second": 0.748, "step": 32 }, { "epoch": 0.09271629778672032, "grad_norm": 0.3963007628917694, "learning_rate": 9.86663298624003e-05, "loss": 12.0261, "step": 36 }, { "epoch": 0.10301810865191147, "grad_norm": 0.23691882193088531, "learning_rate": 9.826044551386744e-05, "loss": 11.967, "step": 40 }, { "epoch": 0.10301810865191147, "eval_loss": 11.981504440307617, "eval_runtime": 120.2154, "eval_samples_per_second": 2.038, "eval_steps_per_second": 2.038, "step": 40 }, { "epoch": 0.11331991951710262, "grad_norm": 0.20406781136989594, "learning_rate": 9.780178907671789e-05, "loss": 11.9641, "step": 44 }, { "epoch": 0.12362173038229377, "grad_norm": 0.16647957265377045, "learning_rate": 9.729086208503174e-05, "loss": 11.9231, "step": 48 }, { "epoch": 0.12362173038229377, "eval_loss": 11.89189624786377, "eval_runtime": 163.7077, "eval_samples_per_second": 1.497, "eval_steps_per_second": 1.497, "step": 48 }, { "epoch": 0.1339235412474849, "grad_norm": 0.15248265862464905, "learning_rate": 9.672822322997305e-05, "loss": 11.8599, "step": 52 }, { "epoch": 0.14422535211267606, "grad_norm": 0.09781660884618759, "learning_rate": 9.611448774886924e-05, "loss": 11.804, "step": 56 }, { "epoch": 0.14422535211267606, "eval_loss": 11.80902099609375, "eval_runtime": 118.3727, "eval_samples_per_second": 2.07, "eval_steps_per_second": 2.07, "step": 56 }, { "epoch": 0.1545271629778672, "grad_norm": 0.2582569718360901, "learning_rate": 9.545032675245813e-05, "loss": 11.7282, "step": 60 }, { "epoch": 0.16482897384305836, "grad_norm": 0.15904435515403748, "learning_rate": 9.473646649103818e-05, "loss": 11.9348, "step": 64 }, { "epoch": 0.16482897384305836, "eval_loss": 11.950884819030762, "eval_runtime": 118.9665, "eval_samples_per_second": 2.059, "eval_steps_per_second": 2.059, "step": 64 }, { "epoch": 0.1751307847082495, "grad_norm": 0.12957070767879486, "learning_rate": 9.397368756032445e-05, "loss": 11.8253, "step": 68 }, { "epoch": 0.18543259557344063, "grad_norm": 0.13200531899929047, "learning_rate": 9.316282404787871e-05, "loss": 11.7656, "step": 72 }, { "epoch": 0.18543259557344063, "eval_loss": 11.661236763000488, "eval_runtime": 325.413, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.753, "step": 72 } ], "logging_steps": 4, "max_steps": 388, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 8, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0372092502440346e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }