{ "best_metric": 0.5070627331733704, "best_model_checkpoint": "bert_uncased_L-4_H-256_A-4_mrpc/checkpoint-105", "epoch": 12.0, "eval_steps": 500, "global_step": 180, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.9642578363418579, "learning_rate": 4.9e-05, "loss": 0.6375, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.6936274509803921, "eval_combined_score": 0.7553056727815577, "eval_f1": 0.8169838945827232, "eval_loss": 0.6024385690689087, "eval_runtime": 0.156, "eval_samples_per_second": 2615.794, "eval_steps_per_second": 12.823, "step": 15 }, { "epoch": 2.0, "grad_norm": 1.6478321552276611, "learning_rate": 4.8e-05, "loss": 0.594, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.6985294117647058, "eval_combined_score": 0.7576104584904007, "eval_f1": 0.8166915052160953, "eval_loss": 0.5776079297065735, "eval_runtime": 0.1598, "eval_samples_per_second": 2553.282, "eval_steps_per_second": 12.516, "step": 30 }, { "epoch": 3.0, "grad_norm": 2.1746251583099365, "learning_rate": 4.7e-05, "loss": 0.5504, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.7279411764705882, "eval_combined_score": 0.7776564358247187, "eval_f1": 0.827371695178849, "eval_loss": 0.5475428104400635, "eval_runtime": 0.1486, "eval_samples_per_second": 2745.65, "eval_steps_per_second": 13.459, "step": 45 }, { "epoch": 4.0, "grad_norm": 2.050297737121582, "learning_rate": 4.600000000000001e-05, "loss": 0.5155, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7598039215686274, "eval_combined_score": 0.7971316905140435, "eval_f1": 0.8344594594594595, "eval_loss": 0.5082876086235046, "eval_runtime": 0.1558, "eval_samples_per_second": 2618.432, "eval_steps_per_second": 12.835, "step": 60 }, { "epoch": 5.0, "grad_norm": 4.48883056640625, "learning_rate": 4.5e-05, "loss": 0.4668, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.7598039215686274, "eval_combined_score": 0.7971316905140435, "eval_f1": 0.8344594594594595, "eval_loss": 0.5116193890571594, "eval_runtime": 0.1902, "eval_samples_per_second": 2144.58, "eval_steps_per_second": 10.513, "step": 75 }, { "epoch": 6.0, "grad_norm": 2.939922332763672, "learning_rate": 4.4000000000000006e-05, "loss": 0.4292, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.7696078431372549, "eval_combined_score": 0.8064705882352942, "eval_f1": 0.8433333333333334, "eval_loss": 0.5237414836883545, "eval_runtime": 0.1477, "eval_samples_per_second": 2761.45, "eval_steps_per_second": 13.537, "step": 90 }, { "epoch": 7.0, "grad_norm": 3.8994574546813965, "learning_rate": 4.3e-05, "loss": 0.3859, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.7720588235294118, "eval_combined_score": 0.8057185309356903, "eval_f1": 0.8393782383419689, "eval_loss": 0.5070627331733704, "eval_runtime": 0.1535, "eval_samples_per_second": 2658.103, "eval_steps_per_second": 13.03, "step": 105 }, { "epoch": 8.0, "grad_norm": 4.1840901374816895, "learning_rate": 4.2e-05, "loss": 0.3455, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.7720588235294118, "eval_combined_score": 0.8073492087190206, "eval_f1": 0.8426395939086294, "eval_loss": 0.5299550890922546, "eval_runtime": 0.1493, "eval_samples_per_second": 2733.148, "eval_steps_per_second": 13.398, "step": 120 }, { "epoch": 9.0, "grad_norm": 4.495199203491211, "learning_rate": 4.1e-05, "loss": 0.3049, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.7720588235294118, "eval_combined_score": 0.8065422322775264, "eval_f1": 0.841025641025641, "eval_loss": 0.5408151745796204, "eval_runtime": 0.1508, "eval_samples_per_second": 2706.105, "eval_steps_per_second": 13.265, "step": 135 }, { "epoch": 10.0, "grad_norm": 3.322133779525757, "learning_rate": 4e-05, "loss": 0.2735, "step": 150 }, { "epoch": 10.0, "eval_accuracy": 0.7745098039215687, "eval_combined_score": 0.8084877786731131, "eval_f1": 0.8424657534246576, "eval_loss": 0.5336543321609497, "eval_runtime": 0.1546, "eval_samples_per_second": 2638.334, "eval_steps_per_second": 12.933, "step": 150 }, { "epoch": 11.0, "grad_norm": 8.718647956848145, "learning_rate": 3.9000000000000006e-05, "loss": 0.2454, "step": 165 }, { "epoch": 11.0, "eval_accuracy": 0.7647058823529411, "eval_combined_score": 0.8023529411764705, "eval_f1": 0.84, "eval_loss": 0.5962408185005188, "eval_runtime": 0.1487, "eval_samples_per_second": 2744.506, "eval_steps_per_second": 13.453, "step": 165 }, { "epoch": 12.0, "grad_norm": 5.987424373626709, "learning_rate": 3.8e-05, "loss": 0.2117, "step": 180 }, { "epoch": 12.0, "eval_accuracy": 0.7794117647058824, "eval_combined_score": 0.8131752701080432, "eval_f1": 0.846938775510204, "eval_loss": 0.5755988955497742, "eval_runtime": 0.1478, "eval_samples_per_second": 2761.343, "eval_steps_per_second": 13.536, "step": 180 }, { "epoch": 12.0, "step": 180, "total_flos": 218095665758208.0, "train_loss": 0.4133431394894918, "train_runtime": 25.962, "train_samples_per_second": 7064.168, "train_steps_per_second": 28.888 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 218095665758208.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }