{ "best_metric": 0.00286100001416597, "best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-2e-5-15000-42/checkpoint-100", "epoch": 33.32835820895522, "global_step": 1100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.03, "learning_rate": 9.800000000000001e-06, "loss": 0.655, "step": 100 }, { "epoch": 3.03, "eval_loss": 0.6341390013694763, "eval_matthews_correlation": 0.00286100001416597, "eval_runtime": 7.8213, "eval_samples_per_second": 133.354, "eval_steps_per_second": 16.749, "step": 100 }, { "epoch": 6.06, "learning_rate": 1.98e-05, "loss": 0.6174, "step": 200 }, { "epoch": 6.06, "eval_loss": 0.6281591057777405, "eval_matthews_correlation": 0.0, "eval_runtime": 7.9503, "eval_samples_per_second": 131.19, "eval_steps_per_second": 16.477, "step": 200 }, { "epoch": 9.09, "learning_rate": 1.9867567567567568e-05, "loss": 0.6196, "step": 300 }, { "epoch": 9.09, "eval_loss": 0.6198328137397766, "eval_matthews_correlation": 0.0, "eval_runtime": 7.8212, "eval_samples_per_second": 133.356, "eval_steps_per_second": 16.749, "step": 300 }, { "epoch": 12.12, "learning_rate": 1.9733783783783785e-05, "loss": 0.6158, "step": 400 }, { "epoch": 12.12, "eval_loss": 0.6199322938919067, "eval_matthews_correlation": 0.0, "eval_runtime": 7.8954, "eval_samples_per_second": 132.102, "eval_steps_per_second": 16.592, "step": 400 }, { "epoch": 15.15, "learning_rate": 1.9600000000000002e-05, "loss": 0.6175, "step": 500 }, { "epoch": 15.15, "eval_loss": 0.6180645823478699, "eval_matthews_correlation": 0.0, "eval_runtime": 8.2397, "eval_samples_per_second": 126.583, "eval_steps_per_second": 15.899, "step": 500 }, { "epoch": 18.18, "learning_rate": 1.9464864864864867e-05, "loss": 0.6152, "step": 600 }, { "epoch": 18.18, "eval_loss": 0.619079053401947, "eval_matthews_correlation": 0.0, "eval_runtime": 7.8304, "eval_samples_per_second": 133.198, "eval_steps_per_second": 16.73, "step": 600 }, { "epoch": 21.21, "learning_rate": 1.932972972972973e-05, "loss": 0.617, "step": 700 }, { "epoch": 21.21, "eval_loss": 0.6184842586517334, "eval_matthews_correlation": 0.0, "eval_runtime": 8.2213, "eval_samples_per_second": 126.865, "eval_steps_per_second": 15.934, "step": 700 }, { "epoch": 24.24, "learning_rate": 1.9194594594594596e-05, "loss": 0.6191, "step": 800 }, { "epoch": 24.24, "eval_loss": 0.6185177564620972, "eval_matthews_correlation": 0.0, "eval_runtime": 7.887, "eval_samples_per_second": 132.243, "eval_steps_per_second": 16.61, "step": 800 }, { "epoch": 27.27, "learning_rate": 1.905945945945946e-05, "loss": 0.6162, "step": 900 }, { "epoch": 27.27, "eval_loss": 0.6183082461357117, "eval_matthews_correlation": 0.0, "eval_runtime": 7.8268, "eval_samples_per_second": 133.26, "eval_steps_per_second": 16.737, "step": 900 }, { "epoch": 30.3, "learning_rate": 1.8924324324324325e-05, "loss": 0.6166, "step": 1000 }, { "epoch": 30.3, "eval_loss": 0.6183302402496338, "eval_matthews_correlation": 0.0, "eval_runtime": 7.795, "eval_samples_per_second": 133.803, "eval_steps_per_second": 16.806, "step": 1000 }, { "epoch": 33.33, "learning_rate": 1.878918918918919e-05, "loss": 0.6177, "step": 1100 }, { "epoch": 33.33, "eval_loss": 0.6182125210762024, "eval_matthews_correlation": 0.0, "eval_runtime": 7.8695, "eval_samples_per_second": 132.537, "eval_steps_per_second": 16.647, "step": 1100 }, { "epoch": 33.33, "step": 1100, "total_flos": 7.341930418964005e+18, "train_loss": 0.6206500174782493, "train_runtime": 3140.0413, "train_samples_per_second": 1222.914, "train_steps_per_second": 4.777 } ], "max_steps": 15000, "num_train_epochs": 455, "total_flos": 7.341930418964005e+18, "trial_name": null, "trial_params": null }