{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.13131545254587834, "eval_steps": 1000, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0065657726272939166, "grad_norm": 0.20985035598278046, "learning_rate": 1.9875205254515602e-05, "loss": 1.2854, "step": 100 }, { "epoch": 0.0065657726272939166, "eval_loss": 1.0137826204299927, "eval_runtime": 30083.3012, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 100 }, { "epoch": 0.013131545254587833, "grad_norm": 0.25870802998542786, "learning_rate": 1.9743842364532024e-05, "loss": 0.9595, "step": 200 }, { "epoch": 0.013131545254587833, "eval_loss": 0.9594874382019043, "eval_runtime": 30081.6328, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 200 }, { "epoch": 0.01969731788188175, "grad_norm": 0.22705765068531036, "learning_rate": 1.961247947454844e-05, "loss": 0.9371, "step": 300 }, { "epoch": 0.01969731788188175, "eval_loss": 0.943973183631897, "eval_runtime": 30082.9648, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 300 }, { "epoch": 0.026263090509175666, "grad_norm": 0.15589605271816254, "learning_rate": 1.9481116584564863e-05, "loss": 0.9386, "step": 400 }, { "epoch": 0.026263090509175666, "eval_loss": 0.9333989024162292, "eval_runtime": 30081.8754, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 400 }, { "epoch": 0.032828863136469585, "grad_norm": 0.3671300411224365, "learning_rate": 1.934975369458128e-05, "loss": 0.9166, "step": 500 }, { "epoch": 0.032828863136469585, "eval_loss": 0.926673173904419, "eval_runtime": 30080.758, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 500 }, { "epoch": 0.0393946357637635, "grad_norm": 0.23869025707244873, "learning_rate": 1.9218390804597703e-05, "loss": 0.9236, "step": 600 }, { "epoch": 0.0393946357637635, "eval_loss": 0.9226961135864258, "eval_runtime": 30048.7574, "eval_samples_per_second": 0.427, "eval_steps_per_second": 0.053, "step": 600 }, { "epoch": 0.04596040839105742, "grad_norm": 0.27560362219810486, "learning_rate": 1.9087027914614124e-05, "loss": 0.9083, "step": 700 }, { "epoch": 0.04596040839105742, "eval_loss": 0.9187578558921814, "eval_runtime": 30068.8429, "eval_samples_per_second": 0.427, "eval_steps_per_second": 0.053, "step": 700 }, { "epoch": 0.05252618101835133, "grad_norm": 0.2736820578575134, "learning_rate": 1.8955665024630542e-05, "loss": 0.929, "step": 800 }, { "epoch": 0.05252618101835133, "eval_loss": 0.9152230620384216, "eval_runtime": 30083.9319, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 800 }, { "epoch": 0.05909195364564525, "grad_norm": 0.2768559455871582, "learning_rate": 1.8824302134646964e-05, "loss": 0.9077, "step": 900 }, { "epoch": 0.05909195364564525, "eval_loss": 0.9131888747215271, "eval_runtime": 30078.968, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 900 }, { "epoch": 0.06565772627293917, "grad_norm": 0.29504087567329407, "learning_rate": 1.8692939244663385e-05, "loss": 0.9018, "step": 1000 }, { "epoch": 0.06565772627293917, "eval_loss": 0.9111798405647278, "eval_runtime": 30085.2411, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1000 }, { "epoch": 0.07222349890023308, "grad_norm": 0.32929274439811707, "learning_rate": 1.8561576354679803e-05, "loss": 0.9113, "step": 1100 }, { "epoch": 0.07222349890023308, "eval_loss": 0.9082886576652527, "eval_runtime": 30080.1756, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1100 }, { "epoch": 0.078789271527527, "grad_norm": 0.3265211582183838, "learning_rate": 1.8430213464696225e-05, "loss": 0.8797, "step": 1200 }, { "epoch": 0.078789271527527, "eval_loss": 0.9068158864974976, "eval_runtime": 30079.3778, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1200 }, { "epoch": 0.08535504415482092, "grad_norm": 0.338413268327713, "learning_rate": 1.8298850574712646e-05, "loss": 0.9187, "step": 1300 }, { "epoch": 0.08535504415482092, "eval_loss": 0.9051068425178528, "eval_runtime": 30077.7335, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1300 }, { "epoch": 0.09192081678211483, "grad_norm": 0.28864777088165283, "learning_rate": 1.8167487684729067e-05, "loss": 0.893, "step": 1400 }, { "epoch": 0.09192081678211483, "eval_loss": 0.9037203192710876, "eval_runtime": 30076.794, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1400 }, { "epoch": 0.09848658940940876, "grad_norm": 0.3874276578426361, "learning_rate": 1.8036124794745485e-05, "loss": 0.897, "step": 1500 }, { "epoch": 0.09848658940940876, "eval_loss": 0.9024273753166199, "eval_runtime": 30075.0565, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1500 }, { "epoch": 0.10505236203670267, "grad_norm": 0.395245760679245, "learning_rate": 1.7904761904761907e-05, "loss": 0.8966, "step": 1600 }, { "epoch": 0.10505236203670267, "eval_loss": 0.9006879329681396, "eval_runtime": 30076.5102, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1600 }, { "epoch": 0.11161813466399659, "grad_norm": 0.36763593554496765, "learning_rate": 1.777339901477833e-05, "loss": 0.9013, "step": 1700 }, { "epoch": 0.11161813466399659, "eval_loss": 0.8995980620384216, "eval_runtime": 30075.5407, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1700 }, { "epoch": 0.1181839072912905, "grad_norm": 0.3429044783115387, "learning_rate": 1.7642036124794746e-05, "loss": 0.887, "step": 1800 }, { "epoch": 0.1181839072912905, "eval_loss": 0.8988845944404602, "eval_runtime": 30075.9991, "eval_samples_per_second": 0.426, "eval_steps_per_second": 0.053, "step": 1800 }, { "epoch": 0.12474967991858442, "grad_norm": 0.41019654273986816, "learning_rate": 1.7510673234811168e-05, "loss": 0.8988, "step": 1900 }, { "epoch": 0.12474967991858442, "eval_loss": 0.8975555300712585, "eval_runtime": 30071.8456, "eval_samples_per_second": 0.427, "eval_steps_per_second": 0.053, "step": 1900 }, { "epoch": 0.13131545254587834, "grad_norm": 0.36183497309684753, "learning_rate": 1.7379310344827586e-05, "loss": 0.8733, "step": 2000 }, { "epoch": 0.13131545254587834, "eval_loss": 0.8967778086662292, "eval_runtime": 30071.3584, "eval_samples_per_second": 0.427, "eval_steps_per_second": 0.053, "step": 2000 } ], "logging_steps": 1000, "max_steps": 15230, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0606252120788173e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }