|
{ |
|
"best_metric": 0.5395539646127814, |
|
"best_model_checkpoint": "./runtime-text-classification/xtremedistil-l12-h384-uncased-CoLA/checkpoint-268", |
|
"epoch": 16.0, |
|
"global_step": 1072, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.6853, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.6765, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.6581, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.6444, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.6218, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.6467, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 6.363636363636364e-05, |
|
"loss": 0.594, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 0.6193, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 8.181818181818183e-05, |
|
"loss": 0.6009, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 0.5942, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001, |
|
"loss": 0.5839, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.999794293416862e-05, |
|
"loss": 0.5991, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.999177190593525e-05, |
|
"loss": 0.5808, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.998148742306837e-05, |
|
"loss": 0.5599, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.996709033180229e-05, |
|
"loss": 0.5859, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.99485818167676e-05, |
|
"loss": 0.5573, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.992596340089365e-05, |
|
"loss": 0.5645, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.989923694528327e-05, |
|
"loss": 0.513, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.986840464905958e-05, |
|
"loss": 0.5347, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.983346904918513e-05, |
|
"loss": 0.5333, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.979443302025305e-05, |
|
"loss": 0.5549, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.975129977425062e-05, |
|
"loss": 0.4822, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5893396139144897, |
|
"eval_matthews_correlation": 0.26208039166807634, |
|
"eval_runtime": 1.0615, |
|
"eval_samples_per_second": 982.602, |
|
"eval_steps_per_second": 62.178, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.970407286029487e-05, |
|
"loss": 0.4866, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.965275616434067e-05, |
|
"loss": 0.4771, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.959735390886086e-05, |
|
"loss": 0.506, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 9.953787065249896e-05, |
|
"loss": 0.5117, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.947431128969387e-05, |
|
"loss": 0.4754, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 9.940668105027739e-05, |
|
"loss": 0.5221, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.933498549904368e-05, |
|
"loss": 0.4806, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 9.925923053529147e-05, |
|
"loss": 0.4426, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 9.917942239233871e-05, |
|
"loss": 0.5009, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.909556763700951e-05, |
|
"loss": 0.4601, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 9.900767316909396e-05, |
|
"loss": 0.4366, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.891574622078034e-05, |
|
"loss": 0.4403, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.881979435606003e-05, |
|
"loss": 0.3992, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.871982547010513e-05, |
|
"loss": 0.464, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.861584778861878e-05, |
|
"loss": 0.4349, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 9.850786986715846e-05, |
|
"loss": 0.4249, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.839590059043184e-05, |
|
"loss": 0.4344, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 9.827994917156587e-05, |
|
"loss": 0.4342, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 9.816002515134865e-05, |
|
"loss": 0.4304, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.80361383974443e-05, |
|
"loss": 0.4584, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.790829910358122e-05, |
|
"loss": 0.4151, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 9.777651778871309e-05, |
|
"loss": 0.4669, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.5811270475387573, |
|
"eval_matthews_correlation": 0.3721681506432673, |
|
"eval_runtime": 1.0734, |
|
"eval_samples_per_second": 971.718, |
|
"eval_steps_per_second": 61.489, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.764080529615351e-05, |
|
"loss": 0.4085, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.750117279268378e-05, |
|
"loss": 0.3755, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.735763176763394e-05, |
|
"loss": 0.3877, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.721019403193753e-05, |
|
"loss": 0.3769, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 9.705887171715966e-05, |
|
"loss": 0.3468, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 9.690367727449888e-05, |
|
"loss": 0.3736, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 9.674462347376259e-05, |
|
"loss": 0.3744, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 9.658172340231635e-05, |
|
"loss": 0.3203, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 9.6414990464007e-05, |
|
"loss": 0.3398, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.624443837805972e-05, |
|
"loss": 0.3255, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.607008117794928e-05, |
|
"loss": 0.349, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.589193321024524e-05, |
|
"loss": 0.4708, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 9.571000913343148e-05, |
|
"loss": 0.3565, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.552432391670009e-05, |
|
"loss": 0.3502, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 9.533489283871966e-05, |
|
"loss": 0.3605, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.51417314863781e-05, |
|
"loss": 0.4047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.494485575350014e-05, |
|
"loss": 0.3681, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 9.474428183953951e-05, |
|
"loss": 0.4035, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 9.454002624824598e-05, |
|
"loss": 0.3704, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 9.433210578630749e-05, |
|
"loss": 0.3648, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.412053756196715e-05, |
|
"loss": 0.3498, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.390533898361555e-05, |
|
"loss": 0.3759, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.368652775835843e-05, |
|
"loss": 0.3077, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.615044891834259, |
|
"eval_matthews_correlation": 0.4382589402952836, |
|
"eval_runtime": 1.0332, |
|
"eval_samples_per_second": 1009.479, |
|
"eval_steps_per_second": 63.879, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 9.346412189055955e-05, |
|
"loss": 0.2746, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 9.323813968035936e-05, |
|
"loss": 0.242, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 9.300859972216924e-05, |
|
"loss": 0.2428, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 9.277552090314135e-05, |
|
"loss": 0.337, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.253892240161466e-05, |
|
"loss": 0.2882, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 9.229882368553692e-05, |
|
"loss": 0.2743, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 9.205524451086274e-05, |
|
"loss": 0.2693, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 9.180820491992799e-05, |
|
"loss": 0.2557, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.155772523980075e-05, |
|
"loss": 0.296, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 9.130382608060868e-05, |
|
"loss": 0.2739, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 9.104652833384317e-05, |
|
"loss": 0.2848, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 9.078585317064036e-05, |
|
"loss": 0.2613, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.052182204003909e-05, |
|
"loss": 0.2775, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 9.025445666721608e-05, |
|
"loss": 0.2558, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 8.998377905169822e-05, |
|
"loss": 0.3344, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 8.970981146555247e-05, |
|
"loss": 0.3003, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 8.943257645155327e-05, |
|
"loss": 0.2598, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 8.91520968213276e-05, |
|
"loss": 0.2584, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 8.886839565347797e-05, |
|
"loss": 0.2581, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 8.858149629168357e-05, |
|
"loss": 0.3605, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 8.829142234277936e-05, |
|
"loss": 0.3091, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 8.79981976748137e-05, |
|
"loss": 0.2594, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.49737870693206787, |
|
"eval_matthews_correlation": 0.5395539646127814, |
|
"eval_runtime": 1.0788, |
|
"eval_samples_per_second": 966.811, |
|
"eval_steps_per_second": 61.179, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.770184641508439e-05, |
|
"loss": 0.2586, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 8.740239294815345e-05, |
|
"loss": 0.2208, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 8.70998619138407e-05, |
|
"loss": 0.1708, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.679427820519625e-05, |
|
"loss": 0.2191, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 8.648566696645233e-05, |
|
"loss": 0.2213, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 8.617405359095437e-05, |
|
"loss": 0.2575, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 8.585946371907138e-05, |
|
"loss": 0.2006, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 8.55419232360865e-05, |
|
"loss": 0.2023, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 8.522145827006675e-05, |
|
"loss": 0.1837, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 8.489809518971348e-05, |
|
"loss": 0.2486, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 8.457186060219239e-05, |
|
"loss": 0.2044, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 8.42427813509444e-05, |
|
"loss": 0.1813, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 8.391088451347688e-05, |
|
"loss": 0.2795, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 8.357619739913557e-05, |
|
"loss": 0.1769, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 8.323874754685755e-05, |
|
"loss": 0.1875, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 8.289856272290527e-05, |
|
"loss": 0.2291, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 8.255567091858182e-05, |
|
"loss": 0.2097, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 8.22101003479278e-05, |
|
"loss": 0.2009, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.186187944539973e-05, |
|
"loss": 0.2932, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 8.151103686353042e-05, |
|
"loss": 0.2447, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 8.115760147057138e-05, |
|
"loss": 0.202, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 8.080160234811742e-05, |
|
"loss": 0.21, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.5594205856323242, |
|
"eval_matthews_correlation": 0.5181917740456299, |
|
"eval_runtime": 1.065, |
|
"eval_samples_per_second": 979.357, |
|
"eval_steps_per_second": 61.973, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.044306878871375e-05, |
|
"loss": 0.2023, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 8.00820302934458e-05, |
|
"loss": 0.1664, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 7.971851656951161e-05, |
|
"loss": 0.2118, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 7.935255752777764e-05, |
|
"loss": 0.1459, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 7.898418328031752e-05, |
|
"loss": 0.1239, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 7.861342413793433e-05, |
|
"loss": 0.2143, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 7.824031060766662e-05, |
|
"loss": 0.1456, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 7.786487339027815e-05, |
|
"loss": 0.1705, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 7.748714337773179e-05, |
|
"loss": 0.1135, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 7.710715165064765e-05, |
|
"loss": 0.2111, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 7.672492947574566e-05, |
|
"loss": 0.2572, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 7.634050830327282e-05, |
|
"loss": 0.2522, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 7.59539197644155e-05, |
|
"loss": 0.1322, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.556519566869666e-05, |
|
"loss": 0.224, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 7.517436800135853e-05, |
|
"loss": 0.1831, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 7.47814689207307e-05, |
|
"loss": 0.1731, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 7.438653075558412e-05, |
|
"loss": 0.2192, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 7.398958600247103e-05, |
|
"loss": 0.1522, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 7.359066732305095e-05, |
|
"loss": 0.2241, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.318980754140326e-05, |
|
"loss": 0.2172, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 7.278703964132639e-05, |
|
"loss": 0.1706, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 7.238239676362372e-05, |
|
"loss": 0.1813, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 7.197591220337679e-05, |
|
"loss": 0.1526, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.5715296268463135, |
|
"eval_matthews_correlation": 0.5149844966342378, |
|
"eval_runtime": 1.0444, |
|
"eval_samples_per_second": 998.702, |
|
"eval_steps_per_second": 63.197, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 7.156761940720555e-05, |
|
"loss": 0.1491, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 7.115755197051645e-05, |
|
"loss": 0.1685, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 7.074574363473798e-05, |
|
"loss": 0.1871, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 7.033222828454442e-05, |
|
"loss": 0.1312, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 6.991703994506761e-05, |
|
"loss": 0.1746, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 6.950021277909749e-05, |
|
"loss": 0.1484, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 6.908178108427088e-05, |
|
"loss": 0.1476, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 6.866177929024945e-05, |
|
"loss": 0.1404, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 6.824024195588677e-05, |
|
"loss": 0.1335, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 6.781720376638477e-05, |
|
"loss": 0.1782, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 6.739269953043959e-05, |
|
"loss": 0.1941, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 6.696676417737764e-05, |
|
"loss": 0.1558, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 6.653943275428135e-05, |
|
"loss": 0.1311, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 6.611074042310549e-05, |
|
"loss": 0.128, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 6.568072245778394e-05, |
|
"loss": 0.1374, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 6.524941424132719e-05, |
|
"loss": 0.1491, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 6.481685126291106e-05, |
|
"loss": 0.1242, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 6.438306911495648e-05, |
|
"loss": 0.1379, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.394810349020083e-05, |
|
"loss": 0.168, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 6.351199017876106e-05, |
|
"loss": 0.1601, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 6.30747650651889e-05, |
|
"loss": 0.1641, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 6.263646412551794e-05, |
|
"loss": 0.1775, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.6637021899223328, |
|
"eval_matthews_correlation": 0.5019828461798207, |
|
"eval_runtime": 1.1986, |
|
"eval_samples_per_second": 870.206, |
|
"eval_steps_per_second": 55.066, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 6.219712342430371e-05, |
|
"loss": 0.1263, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 6.175677911165599e-05, |
|
"loss": 0.114, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 6.131546742026438e-05, |
|
"loss": 0.0823, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 6.0873224662416896e-05, |
|
"loss": 0.1068, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 6.04300872270122e-05, |
|
"loss": 0.1067, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.998609157656539e-05, |
|
"loss": 0.1026, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 5.954127424420773e-05, |
|
"loss": 0.1208, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 5.9095671830680656e-05, |
|
"loss": 0.0778, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 5.864932100132411e-05, |
|
"loss": 0.1066, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 5.82022584830597e-05, |
|
"loss": 0.1086, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 5.7754521061368684e-05, |
|
"loss": 0.1833, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 5.730614557726509e-05, |
|
"loss": 0.1193, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 5.685716892426445e-05, |
|
"loss": 0.1628, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 5.640762804534806e-05, |
|
"loss": 0.1774, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 5.595755992992317e-05, |
|
"loss": 0.1434, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 5.550700161077945e-05, |
|
"loss": 0.0957, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 5.505599016104187e-05, |
|
"loss": 0.1219, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 5.460456269112013e-05, |
|
"loss": 0.1562, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 5.415275634565517e-05, |
|
"loss": 0.136, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 5.370060830046282e-05, |
|
"loss": 0.0868, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 5.3248155759474846e-05, |
|
"loss": 0.1028, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 5.2795435951677785e-05, |
|
"loss": 0.1681, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.6957959532737732, |
|
"eval_matthews_correlation": 0.5131045571647604, |
|
"eval_runtime": 1.0682, |
|
"eval_samples_per_second": 976.38, |
|
"eval_steps_per_second": 61.784, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 5.234248612804952e-05, |
|
"loss": 0.1163, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 5.1889343558494266e-05, |
|
"loss": 0.1022, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 5.14360455287759e-05, |
|
"loss": 0.0854, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.098262933744994e-05, |
|
"loss": 0.112, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 5.052913229279459e-05, |
|
"loss": 0.1067, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 5.007559170974084e-05, |
|
"loss": 0.083, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 4.962204490680216e-05, |
|
"loss": 0.085, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 4.9168529203003814e-05, |
|
"loss": 0.1075, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 4.871508191481211e-05, |
|
"loss": 0.1416, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 4.826174035306398e-05, |
|
"loss": 0.1248, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 4.7808541819896885e-05, |
|
"loss": 0.0748, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.735552360567952e-05, |
|
"loss": 0.0707, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 4.6902722985943444e-05, |
|
"loss": 0.0778, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.645017721831602e-05, |
|
"loss": 0.1447, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 4.599792353945466e-05, |
|
"loss": 0.0839, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 4.5545999161982953e-05, |
|
"loss": 0.1268, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 4.509444127142871e-05, |
|
"loss": 0.0705, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.464328702316427e-05, |
|
"loss": 0.1196, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 4.419257353934915e-05, |
|
"loss": 0.1071, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"learning_rate": 4.374233790587565e-05, |
|
"loss": 0.1177, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 4.329261716931727e-05, |
|
"loss": 0.104, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.284344833388047e-05, |
|
"loss": 0.0596, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.2394868358359774e-05, |
|
"loss": 0.124, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.7057417631149292, |
|
"eval_matthews_correlation": 0.5153742778418894, |
|
"eval_runtime": 1.2043, |
|
"eval_samples_per_second": 866.095, |
|
"eval_steps_per_second": 54.806, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 4.1946914153096795e-05, |
|
"loss": 0.0838, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 4.149962257694315e-05, |
|
"loss": 0.0915, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.105303043422753e-05, |
|
"loss": 0.0818, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.060717447172743e-05, |
|
"loss": 0.0508, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 4.0162091375645493e-05, |
|
"loss": 0.1156, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3.971781776859093e-05, |
|
"loss": 0.0697, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.9274390206565956e-05, |
|
"loss": 0.0563, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 3.883184517595807e-05, |
|
"loss": 0.0993, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3.83902190905377e-05, |
|
"loss": 0.0621, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 3.794954828846208e-05, |
|
"loss": 0.0788, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 3.7509869029285215e-05, |
|
"loss": 0.1224, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 3.707121749097431e-05, |
|
"loss": 0.0759, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 3.663362976693304e-05, |
|
"loss": 0.0925, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 3.619714186303162e-05, |
|
"loss": 0.0908, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 3.576178969464414e-05, |
|
"loss": 0.12, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 3.532760908369344e-05, |
|
"loss": 0.0629, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 3.489463575570349e-05, |
|
"loss": 0.0506, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 3.446290533685984e-05, |
|
"loss": 0.0738, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 3.403245335107822e-05, |
|
"loss": 0.0586, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 3.360331521708149e-05, |
|
"loss": 0.106, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 3.3175526245485366e-05, |
|
"loss": 0.111, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 3.274912163589291e-05, |
|
"loss": 0.1111, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.8173357844352722, |
|
"eval_matthews_correlation": 0.5074384885743003, |
|
"eval_runtime": 1.0523, |
|
"eval_samples_per_second": 991.19, |
|
"eval_steps_per_second": 62.722, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.2324136473998204e-05, |
|
"loss": 0.0763, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"learning_rate": 3.190060572869948e-05, |
|
"loss": 0.07, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 3.147856424922174e-05, |
|
"loss": 0.051, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 3.1058046762249224e-05, |
|
"loss": 0.0454, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 3.063908786906812e-05, |
|
"loss": 0.0952, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 3.022172204271938e-05, |
|
"loss": 0.0582, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 2.9805983625162227e-05, |
|
"loss": 0.0653, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 2.93919068244484e-05, |
|
"loss": 0.0617, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 2.897952571190743e-05, |
|
"loss": 0.0648, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 2.8568874219343155e-05, |
|
"loss": 0.0503, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 2.8159986136241732e-05, |
|
"loss": 0.1065, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 2.7752895106991384e-05, |
|
"loss": 0.1168, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 2.7347634628113916e-05, |
|
"loss": 0.0471, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 2.6944238045508708e-05, |
|
"loss": 0.0782, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 2.6542738551708828e-05, |
|
"loss": 0.0773, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 2.6143169183149874e-05, |
|
"loss": 0.0629, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 2.5745562817451686e-05, |
|
"loss": 0.0662, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 2.5349952170712977e-05, |
|
"loss": 0.0882, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 2.4956369794819535e-05, |
|
"loss": 0.0508, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 2.4564848074765674e-05, |
|
"loss": 0.0855, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 2.417541922598945e-05, |
|
"loss": 0.0617, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 2.378811529172203e-05, |
|
"loss": 0.1332, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.8252650499343872, |
|
"eval_matthews_correlation": 0.5260499940984096, |
|
"eval_runtime": 1.0581, |
|
"eval_samples_per_second": 985.733, |
|
"eval_steps_per_second": 62.376, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 2.340296814035101e-05, |
|
"loss": 0.0843, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 2.3020009462798163e-05, |
|
"loss": 0.0988, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 2.263927076991193e-05, |
|
"loss": 0.0605, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 2.226078338987453e-05, |
|
"loss": 0.0337, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 2.1884578465624257e-05, |
|
"loss": 0.0412, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 2.1510686952292934e-05, |
|
"loss": 0.0611, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 2.1139139614658798e-05, |
|
"loss": 0.0587, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 2.0769967024615183e-05, |
|
"loss": 0.0204, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 2.0403199558654945e-05, |
|
"loss": 0.1346, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 2.0038867395370936e-05, |
|
"loss": 0.0577, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 1.967700051297295e-05, |
|
"loss": 0.0754, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 11.51, |
|
"learning_rate": 1.931762868682098e-05, |
|
"loss": 0.0537, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 1.8960781486975143e-05, |
|
"loss": 0.0334, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 1.860648827576278e-05, |
|
"loss": 0.0651, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 1.8254778205362206e-05, |
|
"loss": 0.0742, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 1.7905680215404174e-05, |
|
"loss": 0.1158, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 1.7559223030590577e-05, |
|
"loss": 0.0916, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 1.7215435158330855e-05, |
|
"loss": 0.0286, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 1.6874344886396438e-05, |
|
"loss": 0.0687, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 1.65359802805931e-05, |
|
"loss": 0.0705, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 1.6200369182451564e-05, |
|
"loss": 0.0717, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 1.586753920693676e-05, |
|
"loss": 0.0445, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.553751774017551e-05, |
|
"loss": 0.0673, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.8085535764694214, |
|
"eval_matthews_correlation": 0.5179780196184617, |
|
"eval_runtime": 1.0516, |
|
"eval_samples_per_second": 991.801, |
|
"eval_steps_per_second": 62.76, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 1.5210331937203088e-05, |
|
"loss": 0.094, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 1.4886008719728989e-05, |
|
"loss": 0.0523, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 1.4564574773921514e-05, |
|
"loss": 0.0667, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 1.4246056548212172e-05, |
|
"loss": 0.0533, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 1.3930480251119321e-05, |
|
"loss": 0.0358, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 1.3617871849091657e-05, |
|
"loss": 0.0532, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 1.3308257064371677e-05, |
|
"loss": 0.0208, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 12.36, |
|
"learning_rate": 1.3001661372879192e-05, |
|
"loss": 0.0506, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 1.2698110002115004e-05, |
|
"loss": 0.0827, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1.2397627929085248e-05, |
|
"loss": 0.0764, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 1.2100239878246133e-05, |
|
"loss": 0.0863, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 1.1805970319469589e-05, |
|
"loss": 0.0572, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 1.1514843466029807e-05, |
|
"loss": 0.103, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 1.1226883272610877e-05, |
|
"loss": 0.0234, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 1.0942113433335788e-05, |
|
"loss": 0.0521, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 1.0660557379816816e-05, |
|
"loss": 0.0459, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 1.0382238279227419e-05, |
|
"loss": 0.0695, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 1.0107179032396086e-05, |
|
"loss": 0.076, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 9.835402271921974e-06, |
|
"loss": 0.0414, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 9.56693036031256e-06, |
|
"loss": 0.0643, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 9.301785388143697e-06, |
|
"loss": 0.0413, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 9.039989172241886e-06, |
|
"loss": 0.0512, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.8409435749053955, |
|
"eval_matthews_correlation": 0.5127766293248668, |
|
"eval_runtime": 1.0686, |
|
"eval_samples_per_second": 976.072, |
|
"eval_steps_per_second": 61.765, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 8.781563253889164e-06, |
|
"loss": 0.0642, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 8.52652889705059e-06, |
|
"loss": 0.0699, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 8.274907086624594e-06, |
|
"loss": 0.0431, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 8.026718526716342e-06, |
|
"loss": 0.035, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 7.781983638934092e-06, |
|
"loss": 0.0535, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 7.5407225607088405e-06, |
|
"loss": 0.0553, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 7.302955143637419e-06, |
|
"loss": 0.0515, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 7.068700951849011e-06, |
|
"loss": 0.047, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 13.39, |
|
"learning_rate": 6.837979260395349e-06, |
|
"loss": 0.0787, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 6.610809053664768e-06, |
|
"loss": 0.0559, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 6.387209023820073e-06, |
|
"loss": 0.0669, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 6.1671975692605185e-06, |
|
"loss": 0.0828, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 5.950792793107934e-06, |
|
"loss": 0.0373, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 5.738012501717144e-06, |
|
"loss": 0.0586, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 5.528874203210827e-06, |
|
"loss": 0.034, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 5.323395106038909e-06, |
|
"loss": 0.0595, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 5.121592117562573e-06, |
|
"loss": 0.0788, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 4.923481842663114e-06, |
|
"loss": 0.0449, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 4.729080582375633e-06, |
|
"loss": 0.0459, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 13.88, |
|
"learning_rate": 4.538404332547719e-06, |
|
"loss": 0.0721, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 4.351468782523316e-06, |
|
"loss": 0.1029, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 4.168289313851731e-06, |
|
"loss": 0.0457, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.8759517073631287, |
|
"eval_matthews_correlation": 0.4946640161033145, |
|
"eval_runtime": 1.079, |
|
"eval_samples_per_second": 966.594, |
|
"eval_steps_per_second": 61.165, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 3.9888809990219985e-06, |
|
"loss": 0.0621, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 3.8132586002227076e-06, |
|
"loss": 0.0333, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 3.641436568127271e-06, |
|
"loss": 0.0461, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 3.4734290407049397e-06, |
|
"loss": 0.0532, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 3.309249842057499e-06, |
|
"loss": 0.044, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 3.148912481281713e-06, |
|
"loss": 0.0556, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 2.99243015135785e-06, |
|
"loss": 0.0417, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 2.839815728064077e-06, |
|
"loss": 0.0593, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 2.6910817689169922e-06, |
|
"loss": 0.0653, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 2.5462405121384423e-06, |
|
"loss": 0.0558, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 2.405303875648418e-06, |
|
"loss": 0.077, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 2.268283456084491e-06, |
|
"loss": 0.0763, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 2.135190527847608e-06, |
|
"loss": 0.075, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 2.0060360421743584e-06, |
|
"loss": 0.0172, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 14.64, |
|
"learning_rate": 1.8808306262359243e-06, |
|
"loss": 0.034, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 1.7595845822636293e-06, |
|
"loss": 0.0643, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 1.6423078867012386e-06, |
|
"loss": 0.0568, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 1.529010189384078e-06, |
|
"loss": 0.0609, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 1.419700812745045e-06, |
|
"loss": 0.0454, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 1.3143887510474629e-06, |
|
"loss": 0.0688, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 1.2130826696450992e-06, |
|
"loss": 0.0528, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 1.1157909042690928e-06, |
|
"loss": 0.0491, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.0225214603420851e-06, |
|
"loss": 0.04, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.8521906137466431, |
|
"eval_matthews_correlation": 0.5103199460790546, |
|
"eval_runtime": 1.1012, |
|
"eval_samples_per_second": 947.135, |
|
"eval_steps_per_second": 59.934, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 9.332820123195418e-07, |
|
"loss": 0.0588, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 8.480799030582187e-07, |
|
"loss": 0.0311, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 7.669221432120288e-07, |
|
"loss": 0.0655, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 6.898154106551702e-07, |
|
"loss": 0.0682, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.167660499326322e-07, |
|
"loss": 0.0736, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 5.477800717381687e-07, |
|
"loss": 0.0778, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 4.828631524197325e-07, |
|
"loss": 0.0917, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 4.2202063351238774e-07, |
|
"loss": 0.0468, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 3.652575212987952e-07, |
|
"loss": 0.0243, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 3.1257848639730803e-07, |
|
"loss": 0.0767, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.6398786337762936e-07, |
|
"loss": 0.0509, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 2.1948965040417547e-07, |
|
"loss": 0.0546, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 1.790875089070887e-07, |
|
"loss": 0.0451, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 1.427847632809509e-07, |
|
"loss": 0.0781, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 15.67, |
|
"learning_rate": 1.1058440061127439e-07, |
|
"loss": 0.0233, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 8.248907042868737e-08, |
|
"loss": 0.0393, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 5.850108449094727e-08, |
|
"loss": 0.0612, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 3.862241659269294e-08, |
|
"loss": 0.0476, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 15.85, |
|
"learning_rate": 2.2854702403068972e-08, |
|
"loss": 0.0283, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 1.1199239331122214e-08, |
|
"loss": 0.0482, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 3.6569864190427738e-09, |
|
"loss": 0.052, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 2.2856426326045565e-10, |
|
"loss": 0.0485, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.8556408882141113, |
|
"eval_matthews_correlation": 0.5076423377649488, |
|
"eval_runtime": 1.1842, |
|
"eval_samples_per_second": 880.772, |
|
"eval_steps_per_second": 55.734, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 1072, |
|
"total_flos": 2253110839083008.0, |
|
"train_loss": 0.1824943411264763, |
|
"train_runtime": 434.1785, |
|
"train_samples_per_second": 315.115, |
|
"train_steps_per_second": 2.469 |
|
} |
|
], |
|
"max_steps": 1072, |
|
"num_train_epochs": 16, |
|
"total_flos": 2253110839083008.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|