{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 15950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31347962382445144, "grad_norm": 0.9161905646324158, "learning_rate": 1.9840000000000003e-05, "loss": 5.5619, "step": 500 }, { "epoch": 0.6269592476489029, "grad_norm": 0.6389903426170349, "learning_rate": 1.9357928802589e-05, "loss": 1.7174, "step": 1000 }, { "epoch": 0.9404388714733543, "grad_norm": 0.5035005211830139, "learning_rate": 1.8710679611650487e-05, "loss": 1.6907, "step": 1500 }, { "epoch": 1.0, "eval_bleu": 12.658054871280777, "eval_loss": 1.6638020277023315, "eval_runtime": 246.1231, "eval_samples_per_second": 11.441, "eval_steps_per_second": 0.715, "step": 1595 }, { "epoch": 1.2539184952978055, "grad_norm": 0.5225879549980164, "learning_rate": 1.8063430420711977e-05, "loss": 1.6638, "step": 2000 }, { "epoch": 1.567398119122257, "grad_norm": 0.497714102268219, "learning_rate": 1.7416181229773465e-05, "loss": 1.6555, "step": 2500 }, { "epoch": 1.8808777429467085, "grad_norm": 0.5155704021453857, "learning_rate": 1.6768932038834952e-05, "loss": 1.6542, "step": 3000 }, { "epoch": 2.0, "eval_bleu": 13.064274040118429, "eval_loss": 1.6535193920135498, "eval_runtime": 263.0779, "eval_samples_per_second": 10.704, "eval_steps_per_second": 0.669, "step": 3190 }, { "epoch": 2.19435736677116, "grad_norm": 0.48234814405441284, "learning_rate": 1.612168284789644e-05, "loss": 1.6385, "step": 3500 }, { "epoch": 2.507836990595611, "grad_norm": 0.5366239547729492, "learning_rate": 1.547443365695793e-05, "loss": 1.6303, "step": 4000 }, { "epoch": 2.8213166144200628, "grad_norm": 0.4303179383277893, "learning_rate": 1.482718446601942e-05, "loss": 1.6291, "step": 4500 }, { "epoch": 3.0, "eval_bleu": 14.326650897191437, "eval_loss": 1.648271083831787, "eval_runtime": 238.3692, "eval_samples_per_second": 11.814, "eval_steps_per_second": 0.738, "step": 4785 }, { "epoch": 3.134796238244514, "grad_norm": 0.40259647369384766, "learning_rate": 1.4179935275080907e-05, "loss": 1.6216, "step": 5000 }, { "epoch": 3.4482758620689653, "grad_norm": 0.49842897057533264, "learning_rate": 1.3532686084142396e-05, "loss": 1.6111, "step": 5500 }, { "epoch": 3.761755485893417, "grad_norm": 0.4395376741886139, "learning_rate": 1.2885436893203884e-05, "loss": 1.6123, "step": 6000 }, { "epoch": 4.0, "eval_bleu": 14.710438534445657, "eval_loss": 1.6465048789978027, "eval_runtime": 229.3865, "eval_samples_per_second": 12.276, "eval_steps_per_second": 0.767, "step": 6380 }, { "epoch": 4.075235109717869, "grad_norm": 0.6033243536949158, "learning_rate": 1.2238187702265374e-05, "loss": 1.6092, "step": 6500 }, { "epoch": 4.38871473354232, "grad_norm": 0.541384220123291, "learning_rate": 1.1590938511326861e-05, "loss": 1.5984, "step": 7000 }, { "epoch": 4.702194357366771, "grad_norm": 0.42259281873703003, "learning_rate": 1.094368932038835e-05, "loss": 1.5995, "step": 7500 }, { "epoch": 5.0, "eval_bleu": 14.942770930991193, "eval_loss": 1.646419644355774, "eval_runtime": 231.9426, "eval_samples_per_second": 12.141, "eval_steps_per_second": 0.759, "step": 7975 }, { "epoch": 5.015673981191223, "grad_norm": 0.45884329080581665, "learning_rate": 1.0296440129449838e-05, "loss": 1.5988, "step": 8000 }, { "epoch": 5.329153605015674, "grad_norm": 0.3725912272930145, "learning_rate": 9.649190938511328e-06, "loss": 1.5874, "step": 8500 }, { "epoch": 5.6426332288401255, "grad_norm": 0.3701029419898987, "learning_rate": 9.001941747572817e-06, "loss": 1.5886, "step": 9000 }, { "epoch": 5.956112852664576, "grad_norm": 0.46482861042022705, "learning_rate": 8.354692556634304e-06, "loss": 1.5893, "step": 9500 }, { "epoch": 6.0, "eval_bleu": 14.678090282520607, "eval_loss": 1.6463446617126465, "eval_runtime": 231.5813, "eval_samples_per_second": 12.16, "eval_steps_per_second": 0.76, "step": 9570 }, { "epoch": 6.269592476489028, "grad_norm": 0.4049539864063263, "learning_rate": 7.707443365695793e-06, "loss": 1.5804, "step": 10000 }, { "epoch": 6.58307210031348, "grad_norm": 0.42662039399147034, "learning_rate": 7.060194174757282e-06, "loss": 1.5804, "step": 10500 }, { "epoch": 6.896551724137931, "grad_norm": 0.5612569451332092, "learning_rate": 6.412944983818771e-06, "loss": 1.5807, "step": 11000 }, { "epoch": 7.0, "eval_bleu": 15.24653511299691, "eval_loss": 1.6464687585830688, "eval_runtime": 228.543, "eval_samples_per_second": 12.322, "eval_steps_per_second": 0.77, "step": 11165 }, { "epoch": 7.210031347962382, "grad_norm": 0.4263598620891571, "learning_rate": 5.765695792880259e-06, "loss": 1.5754, "step": 11500 }, { "epoch": 7.523510971786834, "grad_norm": 0.39763760566711426, "learning_rate": 5.118446601941748e-06, "loss": 1.574, "step": 12000 }, { "epoch": 7.836990595611285, "grad_norm": 0.4740435779094696, "learning_rate": 4.471197411003236e-06, "loss": 1.5739, "step": 12500 }, { "epoch": 8.0, "eval_bleu": 15.692886975359345, "eval_loss": 1.6478267908096313, "eval_runtime": 228.9879, "eval_samples_per_second": 12.298, "eval_steps_per_second": 0.769, "step": 12760 }, { "epoch": 8.150470219435737, "grad_norm": 0.35859590768814087, "learning_rate": 3.823948220064725e-06, "loss": 1.571, "step": 13000 }, { "epoch": 8.463949843260188, "grad_norm": 0.32463109493255615, "learning_rate": 3.176699029126214e-06, "loss": 1.569, "step": 13500 }, { "epoch": 8.77742946708464, "grad_norm": 0.4066263437271118, "learning_rate": 2.5294498381877025e-06, "loss": 1.5694, "step": 14000 }, { "epoch": 9.0, "eval_bleu": 15.603090223915896, "eval_loss": 1.647833228111267, "eval_runtime": 226.5331, "eval_samples_per_second": 12.431, "eval_steps_per_second": 0.777, "step": 14355 }, { "epoch": 9.090909090909092, "grad_norm": 0.4182904362678528, "learning_rate": 1.882200647249191e-06, "loss": 1.568, "step": 14500 }, { "epoch": 9.404388714733543, "grad_norm": 0.45490360260009766, "learning_rate": 1.2349514563106797e-06, "loss": 1.5655, "step": 15000 }, { "epoch": 9.717868338557993, "grad_norm": 0.42171570658683777, "learning_rate": 5.877022653721683e-07, "loss": 1.5658, "step": 15500 }, { "epoch": 10.0, "eval_bleu": 15.780359854009903, "eval_loss": 1.6481457948684692, "eval_runtime": 230.3301, "eval_samples_per_second": 12.226, "eval_steps_per_second": 0.764, "step": 15950 } ], "logging_steps": 500, "max_steps": 15950, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3821793152270336e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }