{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 28712, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01741432153803288, "grad_norm": 9.577305793762207, "learning_rate": 1.160092807424594e-05, "loss": 3.1831, "step": 500 }, { "epoch": 0.03482864307606576, "grad_norm": 12.14001178741455, "learning_rate": 1.9900897666068224e-05, "loss": 2.7736, "step": 1000 }, { "epoch": 0.052242964614098636, "grad_norm": 13.129646301269531, "learning_rate": 1.9541831238779175e-05, "loss": 2.6251, "step": 1500 }, { "epoch": 0.06965728615213151, "grad_norm": 13.319927215576172, "learning_rate": 1.9182764811490127e-05, "loss": 2.685, "step": 2000 }, { "epoch": 0.0870716076901644, "grad_norm": 9.90979290008545, "learning_rate": 1.882369838420108e-05, "loss": 2.4835, "step": 2500 }, { "epoch": 0.10448592922819727, "grad_norm": 7.106535911560059, "learning_rate": 1.846463195691203e-05, "loss": 2.4759, "step": 3000 }, { "epoch": 0.12190025076623015, "grad_norm": 11.579163551330566, "learning_rate": 1.8105565529622982e-05, "loss": 2.4615, "step": 3500 }, { "epoch": 0.13931457230426303, "grad_norm": 8.479988098144531, "learning_rate": 1.7746499102333934e-05, "loss": 2.5471, "step": 4000 }, { "epoch": 0.1567288938422959, "grad_norm": 6.432307720184326, "learning_rate": 1.7387432675044886e-05, "loss": 2.4572, "step": 4500 }, { "epoch": 0.1741432153803288, "grad_norm": 10.89651870727539, "learning_rate": 1.7028366247755838e-05, "loss": 2.4725, "step": 5000 }, { "epoch": 0.19155753691836166, "grad_norm": 8.298269271850586, "learning_rate": 1.6669299820466786e-05, "loss": 2.4184, "step": 5500 }, { "epoch": 0.20897185845639454, "grad_norm": 3.4620511531829834, "learning_rate": 1.6310233393177738e-05, "loss": 2.3301, "step": 6000 }, { "epoch": 0.22638617999442742, "grad_norm": 5.399957180023193, "learning_rate": 1.5951166965888693e-05, "loss": 2.4545, "step": 6500 }, { "epoch": 0.2438005015324603, "grad_norm": 8.993759155273438, "learning_rate": 1.559210053859964e-05, "loss": 2.3337, "step": 7000 }, { "epoch": 0.26121482307049315, "grad_norm": 3.5916497707366943, "learning_rate": 1.5233034111310595e-05, "loss": 2.4419, "step": 7500 }, { "epoch": 0.27862914460852606, "grad_norm": 8.653467178344727, "learning_rate": 1.4873967684021545e-05, "loss": 2.3755, "step": 8000 }, { "epoch": 0.2960434661465589, "grad_norm": 6.519212245941162, "learning_rate": 1.4514901256732496e-05, "loss": 2.3601, "step": 8500 }, { "epoch": 0.3134577876845918, "grad_norm": 8.016100883483887, "learning_rate": 1.4155834829443448e-05, "loss": 2.2321, "step": 9000 }, { "epoch": 0.33087210922262467, "grad_norm": 16.313932418823242, "learning_rate": 1.37967684021544e-05, "loss": 2.3488, "step": 9500 }, { "epoch": 0.3482864307606576, "grad_norm": 28.44437026977539, "learning_rate": 1.3437701974865352e-05, "loss": 2.3364, "step": 10000 }, { "epoch": 0.3657007522986904, "grad_norm": 3.504075527191162, "learning_rate": 1.3078635547576302e-05, "loss": 2.2997, "step": 10500 }, { "epoch": 0.38311507383672333, "grad_norm": 11.785683631896973, "learning_rate": 1.2719569120287253e-05, "loss": 2.3374, "step": 11000 }, { "epoch": 0.4005293953747562, "grad_norm": 9.7432861328125, "learning_rate": 1.2360502692998207e-05, "loss": 2.266, "step": 11500 }, { "epoch": 0.4179437169127891, "grad_norm": 8.663525581359863, "learning_rate": 1.2001436265709157e-05, "loss": 2.2722, "step": 12000 }, { "epoch": 0.43535803845082194, "grad_norm": 8.145735740661621, "learning_rate": 1.1642369838420109e-05, "loss": 2.3819, "step": 12500 }, { "epoch": 0.45277235998885484, "grad_norm": 6.875612258911133, "learning_rate": 1.1283303411131059e-05, "loss": 2.3541, "step": 13000 }, { "epoch": 0.4701866815268877, "grad_norm": 4.973703861236572, "learning_rate": 1.0924236983842012e-05, "loss": 2.3205, "step": 13500 }, { "epoch": 0.4876010030649206, "grad_norm": 8.985243797302246, "learning_rate": 1.0565170556552964e-05, "loss": 2.3118, "step": 14000 }, { "epoch": 0.5050153246029535, "grad_norm": 3.0293591022491455, "learning_rate": 1.0206104129263914e-05, "loss": 2.2947, "step": 14500 }, { "epoch": 0.5224296461409863, "grad_norm": 11.419426918029785, "learning_rate": 9.847037701974866e-06, "loss": 2.2276, "step": 15000 }, { "epoch": 0.5398439676790192, "grad_norm": 9.195966720581055, "learning_rate": 9.487971274685817e-06, "loss": 2.333, "step": 15500 }, { "epoch": 0.5572582892170521, "grad_norm": 7.067108631134033, "learning_rate": 9.128904847396769e-06, "loss": 2.3562, "step": 16000 }, { "epoch": 0.574672610755085, "grad_norm": 5.443989276885986, "learning_rate": 8.769838420107721e-06, "loss": 2.3378, "step": 16500 }, { "epoch": 0.5920869322931178, "grad_norm": 10.031864166259766, "learning_rate": 8.410771992818673e-06, "loss": 2.3233, "step": 17000 }, { "epoch": 0.6095012538311507, "grad_norm": 7.518855094909668, "learning_rate": 8.051705565529624e-06, "loss": 2.2783, "step": 17500 }, { "epoch": 0.6269155753691836, "grad_norm": 8.214040756225586, "learning_rate": 7.692639138240574e-06, "loss": 2.1764, "step": 18000 }, { "epoch": 0.6443298969072165, "grad_norm": 10.006634712219238, "learning_rate": 7.333572710951526e-06, "loss": 2.3743, "step": 18500 }, { "epoch": 0.6617442184452493, "grad_norm": 8.866910934448242, "learning_rate": 6.974506283662478e-06, "loss": 2.3044, "step": 19000 }, { "epoch": 0.6791585399832822, "grad_norm": 5.549246311187744, "learning_rate": 6.6154398563734305e-06, "loss": 2.2693, "step": 19500 }, { "epoch": 0.6965728615213151, "grad_norm": 4.0343194007873535, "learning_rate": 6.2563734290843814e-06, "loss": 2.1788, "step": 20000 }, { "epoch": 0.713987183059348, "grad_norm": 6.325515270233154, "learning_rate": 5.897307001795332e-06, "loss": 2.2589, "step": 20500 }, { "epoch": 0.7314015045973808, "grad_norm": 5.234256267547607, "learning_rate": 5.538240574506284e-06, "loss": 2.1563, "step": 21000 }, { "epoch": 0.7488158261354138, "grad_norm": 8.443244934082031, "learning_rate": 5.179174147217235e-06, "loss": 2.2293, "step": 21500 }, { "epoch": 0.7662301476734467, "grad_norm": 3.8823511600494385, "learning_rate": 4.820107719928187e-06, "loss": 2.2258, "step": 22000 }, { "epoch": 0.7836444692114796, "grad_norm": 6.5127034187316895, "learning_rate": 4.4610412926391385e-06, "loss": 2.1272, "step": 22500 }, { "epoch": 0.8010587907495124, "grad_norm": 12.666552543640137, "learning_rate": 4.10197486535009e-06, "loss": 2.264, "step": 23000 }, { "epoch": 0.8184731122875453, "grad_norm": 5.594479084014893, "learning_rate": 3.7429084380610415e-06, "loss": 2.2525, "step": 23500 }, { "epoch": 0.8358874338255782, "grad_norm": 5.7376179695129395, "learning_rate": 3.3838420107719933e-06, "loss": 2.3071, "step": 24000 }, { "epoch": 0.8533017553636111, "grad_norm": 12.593650817871094, "learning_rate": 3.0247755834829446e-06, "loss": 2.2623, "step": 24500 }, { "epoch": 0.8707160769016439, "grad_norm": 10.175933837890625, "learning_rate": 2.6657091561938963e-06, "loss": 2.2561, "step": 25000 }, { "epoch": 0.8881303984396768, "grad_norm": 11.089727401733398, "learning_rate": 2.3066427289048477e-06, "loss": 2.2185, "step": 25500 }, { "epoch": 0.9055447199777097, "grad_norm": 7.090406894683838, "learning_rate": 1.947576301615799e-06, "loss": 2.204, "step": 26000 }, { "epoch": 0.9229590415157426, "grad_norm": 16.22230339050293, "learning_rate": 1.5885098743267505e-06, "loss": 2.2285, "step": 26500 }, { "epoch": 0.9403733630537754, "grad_norm": 4.419189929962158, "learning_rate": 1.229443447037702e-06, "loss": 2.2739, "step": 27000 }, { "epoch": 0.9577876845918083, "grad_norm": 12.452730178833008, "learning_rate": 8.703770197486536e-07, "loss": 2.1793, "step": 27500 }, { "epoch": 0.9752020061298412, "grad_norm": 19.379770278930664, "learning_rate": 5.11310592459605e-07, "loss": 2.1586, "step": 28000 }, { "epoch": 0.9926163276678741, "grad_norm": 19.553585052490234, "learning_rate": 1.5224416517055656e-07, "loss": 2.1482, "step": 28500 }, { "epoch": 1.0, "step": 28712, "total_flos": 0.0, "train_loss": 2.343143721416024, "train_runtime": 98844.7919, "train_samples_per_second": 0.581, "train_steps_per_second": 0.29 } ], "logging_steps": 500, "max_steps": 28712, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }