File size: 4,708 Bytes
5497460 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 |
{
"best_metric": 0.00286100001416597,
"best_model_checkpoint": "cola-pixel-handwritten-mean-vatrpp-256-64-4-2e-5-15000-42/checkpoint-100",
"epoch": 33.32835820895522,
"global_step": 1100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 3.03,
"learning_rate": 9.800000000000001e-06,
"loss": 0.655,
"step": 100
},
{
"epoch": 3.03,
"eval_loss": 0.6341390013694763,
"eval_matthews_correlation": 0.00286100001416597,
"eval_runtime": 7.8213,
"eval_samples_per_second": 133.354,
"eval_steps_per_second": 16.749,
"step": 100
},
{
"epoch": 6.06,
"learning_rate": 1.98e-05,
"loss": 0.6174,
"step": 200
},
{
"epoch": 6.06,
"eval_loss": 0.6281591057777405,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.9503,
"eval_samples_per_second": 131.19,
"eval_steps_per_second": 16.477,
"step": 200
},
{
"epoch": 9.09,
"learning_rate": 1.9867567567567568e-05,
"loss": 0.6196,
"step": 300
},
{
"epoch": 9.09,
"eval_loss": 0.6198328137397766,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8212,
"eval_samples_per_second": 133.356,
"eval_steps_per_second": 16.749,
"step": 300
},
{
"epoch": 12.12,
"learning_rate": 1.9733783783783785e-05,
"loss": 0.6158,
"step": 400
},
{
"epoch": 12.12,
"eval_loss": 0.6199322938919067,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8954,
"eval_samples_per_second": 132.102,
"eval_steps_per_second": 16.592,
"step": 400
},
{
"epoch": 15.15,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.6175,
"step": 500
},
{
"epoch": 15.15,
"eval_loss": 0.6180645823478699,
"eval_matthews_correlation": 0.0,
"eval_runtime": 8.2397,
"eval_samples_per_second": 126.583,
"eval_steps_per_second": 15.899,
"step": 500
},
{
"epoch": 18.18,
"learning_rate": 1.9464864864864867e-05,
"loss": 0.6152,
"step": 600
},
{
"epoch": 18.18,
"eval_loss": 0.619079053401947,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8304,
"eval_samples_per_second": 133.198,
"eval_steps_per_second": 16.73,
"step": 600
},
{
"epoch": 21.21,
"learning_rate": 1.932972972972973e-05,
"loss": 0.617,
"step": 700
},
{
"epoch": 21.21,
"eval_loss": 0.6184842586517334,
"eval_matthews_correlation": 0.0,
"eval_runtime": 8.2213,
"eval_samples_per_second": 126.865,
"eval_steps_per_second": 15.934,
"step": 700
},
{
"epoch": 24.24,
"learning_rate": 1.9194594594594596e-05,
"loss": 0.6191,
"step": 800
},
{
"epoch": 24.24,
"eval_loss": 0.6185177564620972,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.887,
"eval_samples_per_second": 132.243,
"eval_steps_per_second": 16.61,
"step": 800
},
{
"epoch": 27.27,
"learning_rate": 1.905945945945946e-05,
"loss": 0.6162,
"step": 900
},
{
"epoch": 27.27,
"eval_loss": 0.6183082461357117,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8268,
"eval_samples_per_second": 133.26,
"eval_steps_per_second": 16.737,
"step": 900
},
{
"epoch": 30.3,
"learning_rate": 1.8924324324324325e-05,
"loss": 0.6166,
"step": 1000
},
{
"epoch": 30.3,
"eval_loss": 0.6183302402496338,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.795,
"eval_samples_per_second": 133.803,
"eval_steps_per_second": 16.806,
"step": 1000
},
{
"epoch": 33.33,
"learning_rate": 1.878918918918919e-05,
"loss": 0.6177,
"step": 1100
},
{
"epoch": 33.33,
"eval_loss": 0.6182125210762024,
"eval_matthews_correlation": 0.0,
"eval_runtime": 7.8695,
"eval_samples_per_second": 132.537,
"eval_steps_per_second": 16.647,
"step": 1100
},
{
"epoch": 33.33,
"step": 1100,
"total_flos": 7.341930418964005e+18,
"train_loss": 0.6206500174782493,
"train_runtime": 3140.0413,
"train_samples_per_second": 1222.914,
"train_steps_per_second": 4.777
}
],
"max_steps": 15000,
"num_train_epochs": 455,
"total_flos": 7.341930418964005e+18,
"trial_name": null,
"trial_params": null
}
|