{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.304932735426009, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 3.7037037037037036e-07, "loss": 0.6848, "step": 2 }, { "epoch": 0.14, "learning_rate": 7.407407407407407e-07, "loss": 0.6805, "step": 4 }, { "epoch": 0.22, "learning_rate": 1.111111111111111e-06, "loss": 0.6842, "step": 6 }, { "epoch": 0.29, "learning_rate": 1.4814814814814815e-06, "loss": 0.6893, "step": 8 }, { "epoch": 0.36, "learning_rate": 1.8518518518518519e-06, "loss": 0.6787, "step": 10 }, { "epoch": 0.43, "learning_rate": 2.222222222222222e-06, "loss": 0.6789, "step": 12 }, { "epoch": 0.5, "learning_rate": 2.5925925925925925e-06, "loss": 0.685, "step": 14 }, { "epoch": 0.57, "learning_rate": 2.962962962962963e-06, "loss": 0.6823, "step": 16 }, { "epoch": 0.65, "learning_rate": 3.3333333333333333e-06, "loss": 0.6836, "step": 18 }, { "epoch": 0.72, "learning_rate": 3.7037037037037037e-06, "loss": 0.6756, "step": 20 }, { "epoch": 0.72, "eval_accuracy": 0.5419999957084656, "eval_average_score": 0.5289427638053894, "eval_label_positive_rate": 0.47600001096725464, "eval_loss": 0.6793817281723022, "eval_runtime": 8.697, "eval_samples_per_second": 57.491, "eval_steps_per_second": 7.244, "step": 20 }, { "epoch": 0.79, "learning_rate": 4.074074074074074e-06, "loss": 0.6845, "step": 22 }, { "epoch": 0.86, "learning_rate": 4.444444444444444e-06, "loss": 0.6837, "step": 24 }, { "epoch": 0.93, "learning_rate": 4.814814814814815e-06, "loss": 0.6743, "step": 26 }, { "epoch": 1.0, "learning_rate": 5.185185185185185e-06, "loss": 0.6718, "step": 28 }, { "epoch": 1.08, "learning_rate": 5.555555555555557e-06, "loss": 0.6777, "step": 30 }, { "epoch": 1.15, "learning_rate": 5.925925925925926e-06, "loss": 0.6702, "step": 32 }, { "epoch": 1.22, "learning_rate": 6.296296296296297e-06, "loss": 0.6743, "step": 34 }, { "epoch": 1.29, "learning_rate": 6.666666666666667e-06, "loss": 0.6722, "step": 36 }, { "epoch": 1.36, "learning_rate": 7.0370370370370375e-06, "loss": 0.6697, "step": 38 }, { "epoch": 1.43, "learning_rate": 7.4074074074074075e-06, "loss": 0.6654, "step": 40 }, { "epoch": 1.43, "eval_accuracy": 0.7080000042915344, "eval_average_score": 0.4319504201412201, "eval_label_positive_rate": 0.47600001096725464, "eval_loss": 0.6608079075813293, "eval_runtime": 8.6968, "eval_samples_per_second": 57.492, "eval_steps_per_second": 7.244, "step": 40 }, { "epoch": 1.51, "learning_rate": 7.77777777777778e-06, "loss": 0.6572, "step": 42 }, { "epoch": 1.58, "learning_rate": 8.148148148148148e-06, "loss": 0.6621, "step": 44 }, { "epoch": 1.65, "learning_rate": 8.518518518518519e-06, "loss": 0.6575, "step": 46 }, { "epoch": 1.72, "learning_rate": 8.888888888888888e-06, "loss": 0.6455, "step": 48 }, { "epoch": 1.79, "learning_rate": 9.25925925925926e-06, "loss": 0.6367, "step": 50 }, { "epoch": 1.87, "learning_rate": 9.62962962962963e-06, "loss": 0.64, "step": 52 }, { "epoch": 1.94, "learning_rate": 1e-05, "loss": 0.6316, "step": 54 }, { "epoch": 2.01, "learning_rate": 1e-05, "loss": 0.6121, "step": 56 }, { "epoch": 2.08, "learning_rate": 1e-05, "loss": 0.6104, "step": 58 }, { "epoch": 2.15, "learning_rate": 1e-05, "loss": 0.5916, "step": 60 }, { "epoch": 2.15, "eval_accuracy": 0.9419999718666077, "eval_average_score": 0.13356663286685944, "eval_label_positive_rate": 0.47600001096725464, "eval_loss": 0.5838245153427124, "eval_runtime": 8.6962, "eval_samples_per_second": 57.496, "eval_steps_per_second": 7.245, "step": 60 }, { "epoch": 2.22, "learning_rate": 1e-05, "loss": 0.5801, "step": 62 }, { "epoch": 2.3, "learning_rate": 1e-05, "loss": 0.5677, "step": 64 }, { "epoch": 2.37, "learning_rate": 1e-05, "loss": 0.5472, "step": 66 }, { "epoch": 2.44, "learning_rate": 1e-05, "loss": 0.5267, "step": 68 }, { "epoch": 2.51, "learning_rate": 1e-05, "loss": 0.5224, "step": 70 }, { "epoch": 2.58, "learning_rate": 1e-05, "loss": 0.49, "step": 72 }, { "epoch": 2.65, "learning_rate": 1e-05, "loss": 0.4607, "step": 74 }, { "epoch": 2.73, "learning_rate": 1e-05, "loss": 0.4295, "step": 76 }, { "epoch": 2.8, "learning_rate": 1e-05, "loss": 0.393, "step": 78 }, { "epoch": 2.87, "learning_rate": 1e-05, "loss": 0.3463, "step": 80 }, { "epoch": 2.87, "eval_accuracy": 0.9800000190734863, "eval_average_score": -0.13676369190216064, "eval_label_positive_rate": 0.47600001096725464, "eval_loss": 0.30906087160110474, "eval_runtime": 8.6996, "eval_samples_per_second": 57.474, "eval_steps_per_second": 7.242, "step": 80 }, { "epoch": 2.94, "learning_rate": 1e-05, "loss": 0.3167, "step": 82 }, { "epoch": 3.01, "learning_rate": 1e-05, "loss": 0.2559, "step": 84 }, { "epoch": 3.09, "learning_rate": 1e-05, "loss": 0.2119, "step": 86 }, { "epoch": 3.16, "learning_rate": 1e-05, "loss": 0.1874, "step": 88 }, { "epoch": 3.23, "learning_rate": 1e-05, "loss": 0.1407, "step": 90 }, { "epoch": 3.3, "learning_rate": 1e-05, "loss": 0.1037, "step": 92 }, { "epoch": 3.37, "learning_rate": 1e-05, "loss": 0.0717, "step": 94 }, { "epoch": 3.44, "learning_rate": 1e-05, "loss": 0.0692, "step": 96 }, { "epoch": 3.52, "learning_rate": 1e-05, "loss": 0.0708, "step": 98 }, { "epoch": 3.59, "learning_rate": 1e-05, "loss": 0.0426, "step": 100 }, { "epoch": 3.59, "eval_accuracy": 0.9879999756813049, "eval_average_score": -0.7325789332389832, "eval_label_positive_rate": 0.47600001096725464, "eval_loss": 0.03151561692357063, "eval_runtime": 8.708, "eval_samples_per_second": 57.418, "eval_steps_per_second": 7.235, "step": 100 }, { "epoch": 3.66, "learning_rate": 1e-05, "loss": 0.0315, "step": 102 }, { "epoch": 3.73, "learning_rate": 1e-05, "loss": 0.0227, "step": 104 }, { "epoch": 3.8, "learning_rate": 1e-05, "loss": 0.0272, "step": 106 }, { "epoch": 3.87, "learning_rate": 1e-05, "loss": 0.0203, "step": 108 }, { "epoch": 3.95, "learning_rate": 1e-05, "loss": 0.0151, "step": 110 }, { "epoch": 4.02, "learning_rate": 1e-05, "loss": 0.0199, "step": 112 }, { "epoch": 4.09, "learning_rate": 1e-05, "loss": 0.0171, "step": 114 }, { "epoch": 4.16, "learning_rate": 1e-05, "loss": 0.0094, "step": 116 }, { "epoch": 4.23, "learning_rate": 1e-05, "loss": 0.0166, "step": 118 }, { "epoch": 4.3, "learning_rate": 1e-05, "loss": 0.0079, "step": 120 }, { "epoch": 4.3, "eval_accuracy": 1.0, "eval_average_score": -1.2080471515655518, "eval_label_positive_rate": 0.47600001096725464, "eval_loss": 0.009560000151395798, "eval_runtime": 8.7049, "eval_samples_per_second": 57.439, "eval_steps_per_second": 7.237, "step": 120 } ], "max_steps": 270, "num_train_epochs": 10, "total_flos": 3.914291024788193e+17, "trial_name": null, "trial_params": null }