{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.09090909090909, "eval_steps": 500, "global_step": 31, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 2.495208501815796, "eval_runtime": 2.7046, "eval_samples_per_second": 86.518, "eval_steps_per_second": 2.958, "step": 0 }, { "epoch": 1.09, "learning_rate": 0.0002999814948722491, "loss": 2.5615, "step": 1 }, { "epoch": 1.09, "eval_loss": 2.527021646499634, "eval_runtime": 2.6357, "eval_samples_per_second": 88.78, "eval_steps_per_second": 3.035, "step": 1 }, { "epoch": 1.09, "eval_loss": 2.5362284183502197, "eval_runtime": 2.6589, "eval_samples_per_second": 88.005, "eval_steps_per_second": 3.009, "step": 1 }, { "epoch": 3.03, "eval_loss": 2.5341787338256836, "eval_runtime": 2.6589, "eval_samples_per_second": 88.007, "eval_steps_per_second": 3.009, "step": 2 }, { "epoch": 4.12, "eval_loss": 2.2734625339508057, "eval_runtime": 2.6648, "eval_samples_per_second": 87.812, "eval_steps_per_second": 3.002, "step": 3 }, { "epoch": 4.12, "eval_loss": 2.3209266662597656, "eval_runtime": 2.6531, "eval_samples_per_second": 88.198, "eval_steps_per_second": 3.015, "step": 3 }, { "epoch": 6.06, "eval_loss": 2.1017019748687744, "eval_runtime": 2.6605, "eval_samples_per_second": 87.954, "eval_steps_per_second": 3.007, "step": 4 }, { "epoch": 7.15, "learning_rate": 0.00029953760005996916, "loss": 2.363, "step": 5 }, { "epoch": 7.15, "eval_loss": 2.012136697769165, "eval_runtime": 2.6573, "eval_samples_per_second": 88.061, "eval_steps_per_second": 3.011, "step": 5 }, { "epoch": 7.15, "eval_loss": 2.0751442909240723, "eval_runtime": 2.6638, "eval_samples_per_second": 87.844, "eval_steps_per_second": 3.003, "step": 5 }, { "epoch": 9.09, "eval_loss": 1.964595079421997, "eval_runtime": 2.7098, "eval_samples_per_second": 86.353, "eval_steps_per_second": 2.952, "step": 6 }, { "epoch": 9.09, "eval_loss": 1.8911688327789307, "eval_runtime": 2.6643, "eval_samples_per_second": 87.829, "eval_steps_per_second": 3.003, "step": 6 }, { "epoch": 11.03, "eval_loss": 1.809972882270813, "eval_runtime": 2.6547, "eval_samples_per_second": 88.146, "eval_steps_per_second": 3.014, "step": 7 }, { "epoch": 12.12, "eval_loss": 1.8143646717071533, "eval_runtime": 2.6683, "eval_samples_per_second": 87.697, "eval_steps_per_second": 2.998, "step": 8 }, { "epoch": 12.12, "eval_loss": 1.7983335256576538, "eval_runtime": 2.6503, "eval_samples_per_second": 88.291, "eval_steps_per_second": 3.018, "step": 8 }, { "epoch": 14.06, "eval_loss": 1.7633870840072632, "eval_runtime": 2.6612, "eval_samples_per_second": 87.931, "eval_steps_per_second": 3.006, "step": 9 }, { "epoch": 15.15, "learning_rate": 0.00029815325108927063, "loss": 1.9009, "step": 10 }, { "epoch": 15.15, "eval_loss": 1.762792706489563, "eval_runtime": 2.6498, "eval_samples_per_second": 88.31, "eval_steps_per_second": 3.019, "step": 10 }, { "epoch": 15.15, "eval_loss": 1.7354298830032349, "eval_runtime": 2.6595, "eval_samples_per_second": 87.986, "eval_steps_per_second": 3.008, "step": 10 }, { "epoch": 17.09, "eval_loss": 1.7343316078186035, "eval_runtime": 2.6543, "eval_samples_per_second": 88.159, "eval_steps_per_second": 3.014, "step": 11 }, { "epoch": 17.09, "eval_loss": 1.7231522798538208, "eval_runtime": 2.6679, "eval_samples_per_second": 87.709, "eval_steps_per_second": 2.999, "step": 11 }, { "epoch": 19.03, "eval_loss": 1.6737045049667358, "eval_runtime": 2.6731, "eval_samples_per_second": 87.538, "eval_steps_per_second": 2.993, "step": 12 }, { "epoch": 20.12, "eval_loss": 1.6417571306228638, "eval_runtime": 2.6611, "eval_samples_per_second": 87.935, "eval_steps_per_second": 3.006, "step": 13 }, { "epoch": 20.12, "eval_loss": 1.663546085357666, "eval_runtime": 2.7084, "eval_samples_per_second": 86.399, "eval_steps_per_second": 2.954, "step": 13 }, { "epoch": 22.06, "eval_loss": 1.6280120611190796, "eval_runtime": 2.6541, "eval_samples_per_second": 88.166, "eval_steps_per_second": 3.014, "step": 14 }, { "epoch": 23.15, "learning_rate": 0.0002958554880596515, "loss": 1.7031, "step": 15 }, { "epoch": 23.15, "eval_loss": 1.6042001247406006, "eval_runtime": 2.6431, "eval_samples_per_second": 88.533, "eval_steps_per_second": 3.027, "step": 15 }, { "epoch": 23.15, "eval_loss": 1.6120343208312988, "eval_runtime": 2.6568, "eval_samples_per_second": 88.076, "eval_steps_per_second": 3.011, "step": 15 }, { "epoch": 25.09, "eval_loss": 1.579213261604309, "eval_runtime": 2.6609, "eval_samples_per_second": 87.94, "eval_steps_per_second": 3.007, "step": 16 }, { "epoch": 25.09, "eval_loss": 1.6127510070800781, "eval_runtime": 2.6566, "eval_samples_per_second": 88.082, "eval_steps_per_second": 3.011, "step": 16 }, { "epoch": 27.03, "eval_loss": 1.5467751026153564, "eval_runtime": 2.655, "eval_samples_per_second": 88.136, "eval_steps_per_second": 3.013, "step": 17 }, { "epoch": 28.12, "eval_loss": 1.530348539352417, "eval_runtime": 2.6531, "eval_samples_per_second": 88.197, "eval_steps_per_second": 3.015, "step": 18 }, { "epoch": 28.12, "eval_loss": 1.5159918069839478, "eval_runtime": 2.6518, "eval_samples_per_second": 88.241, "eval_steps_per_second": 3.017, "step": 18 }, { "epoch": 30.06, "eval_loss": 1.5194865465164185, "eval_runtime": 2.6595, "eval_samples_per_second": 87.987, "eval_steps_per_second": 3.008, "step": 19 }, { "epoch": 31.15, "learning_rate": 0.00029265847744427303, "loss": 1.5968, "step": 20 }, { "epoch": 31.15, "eval_loss": 1.5098381042480469, "eval_runtime": 2.6396, "eval_samples_per_second": 88.651, "eval_steps_per_second": 3.031, "step": 20 }, { "epoch": 31.15, "eval_loss": 1.4774686098098755, "eval_runtime": 2.6606, "eval_samples_per_second": 87.951, "eval_steps_per_second": 3.007, "step": 20 }, { "epoch": 33.09, "eval_loss": 1.4770317077636719, "eval_runtime": 2.6523, "eval_samples_per_second": 88.225, "eval_steps_per_second": 3.016, "step": 21 }, { "epoch": 33.09, "eval_loss": 1.4588351249694824, "eval_runtime": 2.6529, "eval_samples_per_second": 88.205, "eval_steps_per_second": 3.016, "step": 21 }, { "epoch": 35.03, "eval_loss": 1.4474384784698486, "eval_runtime": 2.6678, "eval_samples_per_second": 87.711, "eval_steps_per_second": 2.999, "step": 22 }, { "epoch": 36.12, "eval_loss": 1.424033761024475, "eval_runtime": 2.6514, "eval_samples_per_second": 88.254, "eval_steps_per_second": 3.017, "step": 23 }, { "epoch": 36.12, "eval_loss": 1.4164339303970337, "eval_runtime": 2.6554, "eval_samples_per_second": 88.121, "eval_steps_per_second": 3.013, "step": 23 }, { "epoch": 38.06, "eval_loss": 1.4059854745864868, "eval_runtime": 2.6536, "eval_samples_per_second": 88.181, "eval_steps_per_second": 3.015, "step": 24 }, { "epoch": 39.15, "learning_rate": 0.000288581929876693, "loss": 1.4776, "step": 25 }, { "epoch": 39.15, "eval_loss": 1.3752561807632446, "eval_runtime": 2.6459, "eval_samples_per_second": 88.439, "eval_steps_per_second": 3.024, "step": 25 }, { "epoch": 39.15, "eval_loss": 1.385780930519104, "eval_runtime": 2.667, "eval_samples_per_second": 87.738, "eval_steps_per_second": 3.0, "step": 25 }, { "epoch": 41.09, "eval_loss": 1.3821604251861572, "eval_runtime": 2.6548, "eval_samples_per_second": 88.141, "eval_steps_per_second": 3.013, "step": 26 }, { "epoch": 41.09, "eval_loss": 1.3268494606018066, "eval_runtime": 2.6901, "eval_samples_per_second": 86.986, "eval_steps_per_second": 2.974, "step": 26 }, { "epoch": 43.03, "eval_loss": 1.3443068265914917, "eval_runtime": 2.6512, "eval_samples_per_second": 88.263, "eval_steps_per_second": 3.018, "step": 27 }, { "epoch": 44.12, "eval_loss": 1.3258930444717407, "eval_runtime": 2.6544, "eval_samples_per_second": 88.156, "eval_steps_per_second": 3.014, "step": 28 }, { "epoch": 44.12, "eval_loss": 1.311697006225586, "eval_runtime": 2.6815, "eval_samples_per_second": 87.264, "eval_steps_per_second": 2.983, "step": 28 }, { "epoch": 46.06, "eval_loss": 1.3104833364486694, "eval_runtime": 2.6829, "eval_samples_per_second": 87.218, "eval_steps_per_second": 2.982, "step": 29 }, { "epoch": 47.15, "learning_rate": 0.00028365097862825513, "loss": 1.3585, "step": 30 }, { "epoch": 47.15, "eval_loss": 1.2553305625915527, "eval_runtime": 2.8251, "eval_samples_per_second": 82.83, "eval_steps_per_second": 2.832, "step": 30 }, { "epoch": 47.15, "eval_loss": 1.275472640991211, "eval_runtime": 2.6596, "eval_samples_per_second": 87.983, "eval_steps_per_second": 3.008, "step": 30 }, { "epoch": 49.09, "eval_loss": 1.2036432027816772, "eval_runtime": 2.6726, "eval_samples_per_second": 87.554, "eval_steps_per_second": 2.993, "step": 31 }, { "epoch": 49.09, "step": 31, "total_flos": 8700902454067200.0, "train_loss": 1.7344687215743526, "train_runtime": 3811.391, "train_samples_per_second": 27.51, "train_steps_per_second": 0.052 } ], "logging_steps": 5, "max_steps": 200, "num_train_epochs": 50, "save_steps": 500, "total_flos": 8700902454067200.0, "trial_name": null, "trial_params": null }