{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.714285714285714, "eval_steps": 1, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5714285714285714, "grad_norm": 368.6911926269531, "learning_rate": 2.5e-05, "loss": 6.5434, "step": 1 }, { "epoch": 0.5714285714285714, "eval_loss": 6.665082931518555, "eval_matthews_correlation": 0.05644705286285884, "eval_runtime": 12.3625, "eval_samples_per_second": 17.877, "eval_steps_per_second": 0.566, "step": 1 }, { "epoch": 1.1428571428571428, "grad_norm": 350.3536682128906, "learning_rate": 5e-05, "loss": 6.5122, "step": 2 }, { "epoch": 1.1428571428571428, "eval_loss": 2.1827754974365234, "eval_matthews_correlation": -0.03063458145320982, "eval_runtime": 12.3544, "eval_samples_per_second": 17.888, "eval_steps_per_second": 0.567, "step": 2 }, { "epoch": 1.7142857142857144, "grad_norm": 154.08387756347656, "learning_rate": 4.375e-05, "loss": 2.5541, "step": 3 }, { "epoch": 1.7142857142857144, "eval_loss": 2.498410940170288, "eval_matthews_correlation": 0.04407882683211864, "eval_runtime": 12.3388, "eval_samples_per_second": 17.911, "eval_steps_per_second": 0.567, "step": 3 }, { "epoch": 2.2857142857142856, "grad_norm": 207.70213317871094, "learning_rate": 3.7500000000000003e-05, "loss": 2.6873, "step": 4 }, { "epoch": 2.2857142857142856, "eval_loss": 1.4699368476867676, "eval_matthews_correlation": 0.3031673704738923, "eval_runtime": 12.3295, "eval_samples_per_second": 17.924, "eval_steps_per_second": 0.568, "step": 4 }, { "epoch": 2.857142857142857, "grad_norm": 37.452369689941406, "learning_rate": 3.125e-05, "loss": 1.2391, "step": 5 }, { "epoch": 2.857142857142857, "eval_loss": 1.352213978767395, "eval_matthews_correlation": 0.314413679438495, "eval_runtime": 12.3574, "eval_samples_per_second": 17.884, "eval_steps_per_second": 0.566, "step": 5 }, { "epoch": 3.4285714285714284, "grad_norm": 29.68463134765625, "learning_rate": 2.5e-05, "loss": 0.9346, "step": 6 }, { "epoch": 3.4285714285714284, "eval_loss": 1.4228562116622925, "eval_matthews_correlation": 0.2853316530558257, "eval_runtime": 12.347, "eval_samples_per_second": 17.899, "eval_steps_per_second": 0.567, "step": 6 }, { "epoch": 4.0, "grad_norm": 39.614559173583984, "learning_rate": 1.8750000000000002e-05, "loss": 0.7116, "step": 7 }, { "epoch": 4.0, "eval_loss": 1.3237327337265015, "eval_matthews_correlation": 0.35815939793257473, "eval_runtime": 12.3427, "eval_samples_per_second": 17.905, "eval_steps_per_second": 0.567, "step": 7 }, { "epoch": 4.571428571428571, "grad_norm": 20.50189208984375, "learning_rate": 1.25e-05, "loss": 0.498, "step": 8 }, { "epoch": 4.571428571428571, "eval_loss": 1.4014744758605957, "eval_matthews_correlation": 0.30895565804216457, "eval_runtime": 12.3323, "eval_samples_per_second": 17.92, "eval_steps_per_second": 0.568, "step": 8 }, { "epoch": 5.142857142857143, "grad_norm": 36.50432205200195, "learning_rate": 6.25e-06, "loss": 0.4381, "step": 9 }, { "epoch": 5.142857142857143, "eval_loss": 1.4038134813308716, "eval_matthews_correlation": 0.3482631092143663, "eval_runtime": 12.3405, "eval_samples_per_second": 17.909, "eval_steps_per_second": 0.567, "step": 9 }, { "epoch": 5.714285714285714, "grad_norm": 38.02540969848633, "learning_rate": 0.0, "loss": 0.3234, "step": 10 }, { "epoch": 5.714285714285714, "eval_loss": 1.3596094846725464, "eval_matthews_correlation": 0.32562719853157773, "eval_runtime": 12.3764, "eval_samples_per_second": 17.857, "eval_steps_per_second": 0.566, "step": 10 }, { "epoch": 5.714285714285714, "step": 10, "total_flos": 2.4165932347392e+16, "train_loss": 2.244178855419159, "train_runtime": 1356.407, "train_samples_per_second": 6.51, "train_steps_per_second": 0.007 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 2.4165932347392e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }