{ "best_metric": 1.2975215911865234, "best_model_checkpoint": "/home/datta0/models/lora_final/gemma-2-9b_magiccoder_ortho/checkpoint-4", "epoch": 0.3135653771562723, "eval_steps": 4, "global_step": 48, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006532612024089007, "grad_norm": 5.831324577331543, "learning_rate": 2.5e-05, "loss": 1.3342, "step": 1 }, { "epoch": 0.013065224048178014, "grad_norm": 5.2910685539245605, "learning_rate": 5e-05, "loss": 1.2199, "step": 2 }, { "epoch": 0.026130448096356028, "grad_norm": 3.488215684890747, "learning_rate": 0.0001, "loss": 1.2682, "step": 4 }, { "epoch": 0.026130448096356028, "eval_loss": 1.2975215911865234, "eval_runtime": 866.0894, "eval_samples_per_second": 0.565, "eval_steps_per_second": 0.565, "step": 4 }, { "epoch": 0.03919567214453404, "grad_norm": 3.667466163635254, "learning_rate": 9.995555091232516e-05, "loss": 1.3348, "step": 6 }, { "epoch": 0.052260896192712056, "grad_norm": 8.944449424743652, "learning_rate": 9.982228267815643e-05, "loss": 1.6701, "step": 8 }, { "epoch": 0.052260896192712056, "eval_loss": 4.235434532165527, "eval_runtime": 252.6713, "eval_samples_per_second": 1.935, "eval_steps_per_second": 1.935, "step": 8 }, { "epoch": 0.06532612024089007, "grad_norm": 8.121936798095703, "learning_rate": 9.96004322435508e-05, "loss": 5.8059, "step": 10 }, { "epoch": 0.07839134428906808, "grad_norm": 1.232266902923584, "learning_rate": 9.929039405048501e-05, "loss": 10.2087, "step": 12 }, { "epoch": 0.07839134428906808, "eval_loss": 11.443126678466797, "eval_runtime": 295.202, "eval_samples_per_second": 1.656, "eval_steps_per_second": 1.656, "step": 12 }, { "epoch": 0.0914565683372461, "grad_norm": 20.134733200073242, "learning_rate": 9.889271933555213e-05, "loss": 11.4156, "step": 14 }, { "epoch": 0.10452179238542411, "grad_norm": 1.036009669303894, "learning_rate": 9.840811514988294e-05, "loss": 11.4501, "step": 16 }, { "epoch": 0.10452179238542411, "eval_loss": 11.910863876342773, "eval_runtime": 288.3269, "eval_samples_per_second": 1.696, "eval_steps_per_second": 1.696, "step": 16 }, { "epoch": 0.11758701643360213, "grad_norm": 0.3910887539386749, "learning_rate": 9.783744310203491e-05, "loss": 11.9102, "step": 18 }, { "epoch": 0.13065224048178015, "grad_norm": 0.7469679713249207, "learning_rate": 9.718171782608356e-05, "loss": 11.8815, "step": 20 }, { "epoch": 0.13065224048178015, "eval_loss": 11.953683853149414, "eval_runtime": 513.8765, "eval_samples_per_second": 0.952, "eval_steps_per_second": 0.952, "step": 20 }, { "epoch": 0.14371746452995815, "grad_norm": 0.6554788947105408, "learning_rate": 9.644210517764014e-05, "loss": 11.9806, "step": 22 }, { "epoch": 0.15678268857813615, "grad_norm": 0.6563255786895752, "learning_rate": 9.561992016100293e-05, "loss": 12.0113, "step": 24 }, { "epoch": 0.15678268857813615, "eval_loss": 11.999762535095215, "eval_runtime": 627.957, "eval_samples_per_second": 0.779, "eval_steps_per_second": 0.779, "step": 24 }, { "epoch": 0.16984791262631418, "grad_norm": 0.5697500109672546, "learning_rate": 9.471662459112747e-05, "loss": 11.9923, "step": 26 }, { "epoch": 0.1829131366744922, "grad_norm": 0.4122526943683624, "learning_rate": 9.373382449457304e-05, "loss": 11.9021, "step": 28 }, { "epoch": 0.1829131366744922, "eval_loss": 11.957951545715332, "eval_runtime": 666.2787, "eval_samples_per_second": 0.734, "eval_steps_per_second": 0.734, "step": 28 }, { "epoch": 0.1959783607226702, "grad_norm": 0.15934322774410248, "learning_rate": 9.267326725404599e-05, "loss": 11.9592, "step": 30 }, { "epoch": 0.20904358477084822, "grad_norm": 0.49380967020988464, "learning_rate": 9.153683850161706e-05, "loss": 11.9273, "step": 32 }, { "epoch": 0.20904358477084822, "eval_loss": 11.900312423706055, "eval_runtime": 678.6572, "eval_samples_per_second": 0.721, "eval_steps_per_second": 0.721, "step": 32 }, { "epoch": 0.22210880881902623, "grad_norm": 0.49335020780563354, "learning_rate": 9.032655876613636e-05, "loss": 11.8792, "step": 34 }, { "epoch": 0.23517403286720426, "grad_norm": 0.333467960357666, "learning_rate": 8.904457988080681e-05, "loss": 11.833, "step": 36 }, { "epoch": 0.23517403286720426, "eval_loss": 11.815201759338379, "eval_runtime": 652.6748, "eval_samples_per_second": 0.749, "eval_steps_per_second": 0.749, "step": 36 }, { "epoch": 0.24823925691538226, "grad_norm": 0.2453756183385849, "learning_rate": 8.76931811573033e-05, "loss": 11.8354, "step": 38 }, { "epoch": 0.2613044809635603, "grad_norm": 0.22134803235530853, "learning_rate": 8.627476533323957e-05, "loss": 11.7118, "step": 40 }, { "epoch": 0.2613044809635603, "eval_loss": 11.792489051818848, "eval_runtime": 659.3874, "eval_samples_per_second": 0.742, "eval_steps_per_second": 0.742, "step": 40 }, { "epoch": 0.2743697050117383, "grad_norm": 0.22551162540912628, "learning_rate": 8.479185430018858e-05, "loss": 11.7013, "step": 42 }, { "epoch": 0.2874349290599163, "grad_norm": 0.24199481308460236, "learning_rate": 8.324708461985124e-05, "loss": 11.5874, "step": 44 }, { "epoch": 0.2874349290599163, "eval_loss": 11.56563663482666, "eval_runtime": 557.8414, "eval_samples_per_second": 0.877, "eval_steps_per_second": 0.877, "step": 44 }, { "epoch": 0.3005001531080943, "grad_norm": 0.1317184865474701, "learning_rate": 8.164320283634585e-05, "loss": 11.5482, "step": 46 }, { "epoch": 0.3135653771562723, "grad_norm": 0.183132141828537, "learning_rate": 7.998306059295301e-05, "loss": 11.4794, "step": 48 }, { "epoch": 0.3135653771562723, "eval_loss": 11.47742748260498, "eval_runtime": 674.8709, "eval_samples_per_second": 0.725, "eval_steps_per_second": 0.725, "step": 48 } ], "logging_steps": 2, "max_steps": 153, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.33894962284462e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }