{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9971671388101983, "eval_steps": 500, "global_step": 176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0056657223796034, "grad_norm": 23.928797485562242, "learning_rate": 5.555555555555555e-07, "loss": 1.3301, "step": 1 }, { "epoch": 0.028328611898016998, "grad_norm": 9.168045171641722, "learning_rate": 2.7777777777777783e-06, "loss": 1.2615, "step": 5 }, { "epoch": 0.056657223796033995, "grad_norm": 3.503193912103178, "learning_rate": 5.555555555555557e-06, "loss": 1.0082, "step": 10 }, { "epoch": 0.08498583569405099, "grad_norm": 2.4268821140139236, "learning_rate": 8.333333333333334e-06, "loss": 0.8627, "step": 15 }, { "epoch": 0.11331444759206799, "grad_norm": 2.510061349082153, "learning_rate": 9.99604698613651e-06, "loss": 0.8289, "step": 20 }, { "epoch": 0.141643059490085, "grad_norm": 2.5610840886165875, "learning_rate": 9.951647332362511e-06, "loss": 0.803, "step": 25 }, { "epoch": 0.16997167138810199, "grad_norm": 2.6655687674841735, "learning_rate": 9.85834670020205e-06, "loss": 0.7805, "step": 30 }, { "epoch": 0.19830028328611898, "grad_norm": 2.3795452616040698, "learning_rate": 9.717066498610673e-06, "loss": 0.7513, "step": 35 }, { "epoch": 0.22662889518413598, "grad_norm": 2.423822398896064, "learning_rate": 9.529201968327618e-06, "loss": 0.7295, "step": 40 }, { "epoch": 0.254957507082153, "grad_norm": 2.397535445659708, "learning_rate": 9.296608402898306e-06, "loss": 0.699, "step": 45 }, { "epoch": 0.28328611898017, "grad_norm": 2.717544074024275, "learning_rate": 9.021582826353825e-06, "loss": 0.6984, "step": 50 }, { "epoch": 0.311614730878187, "grad_norm": 2.5255796668705335, "learning_rate": 8.706841308493092e-06, "loss": 0.6602, "step": 55 }, { "epoch": 0.33994334277620397, "grad_norm": 2.0707712756959604, "learning_rate": 8.355492141795185e-06, "loss": 0.6637, "step": 60 }, { "epoch": 0.36827195467422097, "grad_norm": 2.2696475360730135, "learning_rate": 7.971005144858554e-06, "loss": 0.6346, "step": 65 }, { "epoch": 0.39660056657223797, "grad_norm": 2.605539930017495, "learning_rate": 7.5571773955171124e-06, "loss": 0.625, "step": 70 }, { "epoch": 0.42492917847025496, "grad_norm": 2.291369766178497, "learning_rate": 7.118095732042643e-06, "loss": 0.5957, "step": 75 }, { "epoch": 0.45325779036827196, "grad_norm": 2.3308971412073367, "learning_rate": 6.65809639276034e-06, "loss": 0.608, "step": 80 }, { "epoch": 0.48158640226628896, "grad_norm": 2.3052197500819758, "learning_rate": 6.181722192664526e-06, "loss": 0.5965, "step": 85 }, { "epoch": 0.509915014164306, "grad_norm": 2.186441360279387, "learning_rate": 5.693677659945343e-06, "loss": 0.5714, "step": 90 }, { "epoch": 0.5382436260623229, "grad_norm": 2.0459304023279654, "learning_rate": 5.19878257548463e-06, "loss": 0.571, "step": 95 }, { "epoch": 0.56657223796034, "grad_norm": 2.111785511247743, "learning_rate": 4.701924374150901e-06, "loss": 0.5575, "step": 100 }, { "epoch": 0.5949008498583569, "grad_norm": 2.0553285107851336, "learning_rate": 4.2080098779639255e-06, "loss": 0.5573, "step": 105 }, { "epoch": 0.623229461756374, "grad_norm": 2.058760374639204, "learning_rate": 3.721916837797627e-06, "loss": 0.5227, "step": 110 }, { "epoch": 0.6515580736543909, "grad_norm": 2.204312230150294, "learning_rate": 3.2484457621808787e-06, "loss": 0.5294, "step": 115 }, { "epoch": 0.6798866855524079, "grad_norm": 1.9646053728129644, "learning_rate": 2.792272508920443e-06, "loss": 0.5231, "step": 120 }, { "epoch": 0.7082152974504249, "grad_norm": 2.2039971826045304, "learning_rate": 2.3579021077369047e-06, "loss": 0.5124, "step": 125 }, { "epoch": 0.7365439093484419, "grad_norm": 2.173776740721594, "learning_rate": 1.949624269947378e-06, "loss": 0.5126, "step": 130 }, { "epoch": 0.7648725212464589, "grad_norm": 2.03790957114551, "learning_rate": 1.5714710245679348e-06, "loss": 0.4943, "step": 135 }, { "epoch": 0.7932011331444759, "grad_norm": 1.985721888347746, "learning_rate": 1.227176899208849e-06, "loss": 0.4883, "step": 140 }, { "epoch": 0.8215297450424929, "grad_norm": 2.1787623191912737, "learning_rate": 9.201420390041965e-07, "loss": 0.4987, "step": 145 }, { "epoch": 0.8498583569405099, "grad_norm": 2.059217292081842, "learning_rate": 6.533986278020876e-07, "loss": 0.4829, "step": 150 }, { "epoch": 0.8781869688385269, "grad_norm": 2.0422032671448997, "learning_rate": 4.2958094322982703e-07, "loss": 0.4962, "step": 155 }, { "epoch": 0.9065155807365439, "grad_norm": 2.0055105526222974, "learning_rate": 2.5089934136108665e-07, "loss": 0.4896, "step": 160 }, { "epoch": 0.9348441926345609, "grad_norm": 1.9559939941635416, "learning_rate": 1.1911842790474637e-07, "loss": 0.4703, "step": 165 }, { "epoch": 0.9631728045325779, "grad_norm": 2.008224762812249, "learning_rate": 3.553963149013295e-08, "loss": 0.4793, "step": 170 }, { "epoch": 0.9915014164305949, "grad_norm": 2.117489119782366, "learning_rate": 9.883511496722176e-10, "loss": 0.4739, "step": 175 }, { "epoch": 0.9971671388101983, "eval_loss": 0.6540379524230957, "eval_runtime": 83.5061, "eval_samples_per_second": 3.617, "eval_steps_per_second": 0.91, "step": 176 }, { "epoch": 0.9971671388101983, "step": 176, "total_flos": 36798474485760.0, "train_loss": 0.6290039020505819, "train_runtime": 6093.0087, "train_samples_per_second": 0.926, "train_steps_per_second": 0.029 } ], "logging_steps": 5, "max_steps": 176, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 36798474485760.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }