{ "best_metric": 0.5395539646127814, "best_model_checkpoint": "./runtime-text-classification/xtremedistil-l12-h384-uncased-CoLA/checkpoint-268", "epoch": 16.0, "global_step": 1072, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 9.090909090909091e-06, "loss": 0.6853, "step": 3 }, { "epoch": 0.09, "learning_rate": 1.8181818181818182e-05, "loss": 0.6765, "step": 6 }, { "epoch": 0.13, "learning_rate": 2.7272727272727273e-05, "loss": 0.6581, "step": 9 }, { "epoch": 0.18, "learning_rate": 3.6363636363636364e-05, "loss": 0.6444, "step": 12 }, { "epoch": 0.22, "learning_rate": 4.545454545454546e-05, "loss": 0.6218, "step": 15 }, { "epoch": 0.27, "learning_rate": 5.4545454545454546e-05, "loss": 0.6467, "step": 18 }, { "epoch": 0.31, "learning_rate": 6.363636363636364e-05, "loss": 0.594, "step": 21 }, { "epoch": 0.36, "learning_rate": 7.272727272727273e-05, "loss": 0.6193, "step": 24 }, { "epoch": 0.4, "learning_rate": 8.181818181818183e-05, "loss": 0.6009, "step": 27 }, { "epoch": 0.45, "learning_rate": 9.090909090909092e-05, "loss": 0.5942, "step": 30 }, { "epoch": 0.49, "learning_rate": 0.0001, "loss": 0.5839, "step": 33 }, { "epoch": 0.54, "learning_rate": 9.999794293416862e-05, "loss": 0.5991, "step": 36 }, { "epoch": 0.58, "learning_rate": 9.999177190593525e-05, "loss": 0.5808, "step": 39 }, { "epoch": 0.63, "learning_rate": 9.998148742306837e-05, "loss": 0.5599, "step": 42 }, { "epoch": 0.67, "learning_rate": 9.996709033180229e-05, "loss": 0.5859, "step": 45 }, { "epoch": 0.72, "learning_rate": 9.99485818167676e-05, "loss": 0.5573, "step": 48 }, { "epoch": 0.76, "learning_rate": 9.992596340089365e-05, "loss": 0.5645, "step": 51 }, { "epoch": 0.81, "learning_rate": 9.989923694528327e-05, "loss": 0.513, "step": 54 }, { "epoch": 0.85, "learning_rate": 9.986840464905958e-05, "loss": 0.5347, "step": 57 }, { "epoch": 0.9, "learning_rate": 9.983346904918513e-05, "loss": 0.5333, "step": 60 }, { "epoch": 0.94, "learning_rate": 9.979443302025305e-05, "loss": 0.5549, "step": 63 }, { "epoch": 0.99, "learning_rate": 9.975129977425062e-05, "loss": 0.4822, "step": 66 }, { "epoch": 1.0, "eval_loss": 0.5893396139144897, "eval_matthews_correlation": 0.26208039166807634, "eval_runtime": 1.0615, "eval_samples_per_second": 982.602, "eval_steps_per_second": 62.178, "step": 67 }, { "epoch": 1.03, "learning_rate": 9.970407286029487e-05, "loss": 0.4866, "step": 69 }, { "epoch": 1.07, "learning_rate": 9.965275616434067e-05, "loss": 0.4771, "step": 72 }, { "epoch": 1.12, "learning_rate": 9.959735390886086e-05, "loss": 0.506, "step": 75 }, { "epoch": 1.16, "learning_rate": 9.953787065249896e-05, "loss": 0.5117, "step": 78 }, { "epoch": 1.21, "learning_rate": 9.947431128969387e-05, "loss": 0.4754, "step": 81 }, { "epoch": 1.25, "learning_rate": 9.940668105027739e-05, "loss": 0.5221, "step": 84 }, { "epoch": 1.3, "learning_rate": 9.933498549904368e-05, "loss": 0.4806, "step": 87 }, { "epoch": 1.34, "learning_rate": 9.925923053529147e-05, "loss": 0.4426, "step": 90 }, { "epoch": 1.39, "learning_rate": 9.917942239233871e-05, "loss": 0.5009, "step": 93 }, { "epoch": 1.43, "learning_rate": 9.909556763700951e-05, "loss": 0.4601, "step": 96 }, { "epoch": 1.48, "learning_rate": 9.900767316909396e-05, "loss": 0.4366, "step": 99 }, { "epoch": 1.52, "learning_rate": 9.891574622078034e-05, "loss": 0.4403, "step": 102 }, { "epoch": 1.57, "learning_rate": 9.881979435606003e-05, "loss": 0.3992, "step": 105 }, { "epoch": 1.61, "learning_rate": 9.871982547010513e-05, "loss": 0.464, "step": 108 }, { "epoch": 1.66, "learning_rate": 9.861584778861878e-05, "loss": 0.4349, "step": 111 }, { "epoch": 1.7, "learning_rate": 9.850786986715846e-05, "loss": 0.4249, "step": 114 }, { "epoch": 1.75, "learning_rate": 9.839590059043184e-05, "loss": 0.4344, "step": 117 }, { "epoch": 1.79, "learning_rate": 9.827994917156587e-05, "loss": 0.4342, "step": 120 }, { "epoch": 1.84, "learning_rate": 9.816002515134865e-05, "loss": 0.4304, "step": 123 }, { "epoch": 1.88, "learning_rate": 9.80361383974443e-05, "loss": 0.4584, "step": 126 }, { "epoch": 1.93, "learning_rate": 9.790829910358122e-05, "loss": 0.4151, "step": 129 }, { "epoch": 1.97, "learning_rate": 9.777651778871309e-05, "loss": 0.4669, "step": 132 }, { "epoch": 2.0, "eval_loss": 0.5811270475387573, "eval_matthews_correlation": 0.3721681506432673, "eval_runtime": 1.0734, "eval_samples_per_second": 971.718, "eval_steps_per_second": 61.489, "step": 134 }, { "epoch": 2.01, "learning_rate": 9.764080529615351e-05, "loss": 0.4085, "step": 135 }, { "epoch": 2.06, "learning_rate": 9.750117279268378e-05, "loss": 0.3755, "step": 138 }, { "epoch": 2.1, "learning_rate": 9.735763176763394e-05, "loss": 0.3877, "step": 141 }, { "epoch": 2.15, "learning_rate": 9.721019403193753e-05, "loss": 0.3769, "step": 144 }, { "epoch": 2.19, "learning_rate": 9.705887171715966e-05, "loss": 0.3468, "step": 147 }, { "epoch": 2.24, "learning_rate": 9.690367727449888e-05, "loss": 0.3736, "step": 150 }, { "epoch": 2.28, "learning_rate": 9.674462347376259e-05, "loss": 0.3744, "step": 153 }, { "epoch": 2.33, "learning_rate": 9.658172340231635e-05, "loss": 0.3203, "step": 156 }, { "epoch": 2.37, "learning_rate": 9.6414990464007e-05, "loss": 0.3398, "step": 159 }, { "epoch": 2.42, "learning_rate": 9.624443837805972e-05, "loss": 0.3255, "step": 162 }, { "epoch": 2.46, "learning_rate": 9.607008117794928e-05, "loss": 0.349, "step": 165 }, { "epoch": 2.51, "learning_rate": 9.589193321024524e-05, "loss": 0.4708, "step": 168 }, { "epoch": 2.55, "learning_rate": 9.571000913343148e-05, "loss": 0.3565, "step": 171 }, { "epoch": 2.6, "learning_rate": 9.552432391670009e-05, "loss": 0.3502, "step": 174 }, { "epoch": 2.64, "learning_rate": 9.533489283871966e-05, "loss": 0.3605, "step": 177 }, { "epoch": 2.69, "learning_rate": 9.51417314863781e-05, "loss": 0.4047, "step": 180 }, { "epoch": 2.73, "learning_rate": 9.494485575350014e-05, "loss": 0.3681, "step": 183 }, { "epoch": 2.78, "learning_rate": 9.474428183953951e-05, "loss": 0.4035, "step": 186 }, { "epoch": 2.82, "learning_rate": 9.454002624824598e-05, "loss": 0.3704, "step": 189 }, { "epoch": 2.87, "learning_rate": 9.433210578630749e-05, "loss": 0.3648, "step": 192 }, { "epoch": 2.91, "learning_rate": 9.412053756196715e-05, "loss": 0.3498, "step": 195 }, { "epoch": 2.96, "learning_rate": 9.390533898361555e-05, "loss": 0.3759, "step": 198 }, { "epoch": 3.0, "learning_rate": 9.368652775835843e-05, "loss": 0.3077, "step": 201 }, { "epoch": 3.0, "eval_loss": 0.615044891834259, "eval_matthews_correlation": 0.4382589402952836, "eval_runtime": 1.0332, "eval_samples_per_second": 1009.479, "eval_steps_per_second": 63.879, "step": 201 }, { "epoch": 3.04, "learning_rate": 9.346412189055955e-05, "loss": 0.2746, "step": 204 }, { "epoch": 3.09, "learning_rate": 9.323813968035936e-05, "loss": 0.242, "step": 207 }, { "epoch": 3.13, "learning_rate": 9.300859972216924e-05, "loss": 0.2428, "step": 210 }, { "epoch": 3.18, "learning_rate": 9.277552090314135e-05, "loss": 0.337, "step": 213 }, { "epoch": 3.22, "learning_rate": 9.253892240161466e-05, "loss": 0.2882, "step": 216 }, { "epoch": 3.27, "learning_rate": 9.229882368553692e-05, "loss": 0.2743, "step": 219 }, { "epoch": 3.31, "learning_rate": 9.205524451086274e-05, "loss": 0.2693, "step": 222 }, { "epoch": 3.36, "learning_rate": 9.180820491992799e-05, "loss": 0.2557, "step": 225 }, { "epoch": 3.4, "learning_rate": 9.155772523980075e-05, "loss": 0.296, "step": 228 }, { "epoch": 3.45, "learning_rate": 9.130382608060868e-05, "loss": 0.2739, "step": 231 }, { "epoch": 3.49, "learning_rate": 9.104652833384317e-05, "loss": 0.2848, "step": 234 }, { "epoch": 3.54, "learning_rate": 9.078585317064036e-05, "loss": 0.2613, "step": 237 }, { "epoch": 3.58, "learning_rate": 9.052182204003909e-05, "loss": 0.2775, "step": 240 }, { "epoch": 3.63, "learning_rate": 9.025445666721608e-05, "loss": 0.2558, "step": 243 }, { "epoch": 3.67, "learning_rate": 8.998377905169822e-05, "loss": 0.3344, "step": 246 }, { "epoch": 3.72, "learning_rate": 8.970981146555247e-05, "loss": 0.3003, "step": 249 }, { "epoch": 3.76, "learning_rate": 8.943257645155327e-05, "loss": 0.2598, "step": 252 }, { "epoch": 3.81, "learning_rate": 8.91520968213276e-05, "loss": 0.2584, "step": 255 }, { "epoch": 3.85, "learning_rate": 8.886839565347797e-05, "loss": 0.2581, "step": 258 }, { "epoch": 3.9, "learning_rate": 8.858149629168357e-05, "loss": 0.3605, "step": 261 }, { "epoch": 3.94, "learning_rate": 8.829142234277936e-05, "loss": 0.3091, "step": 264 }, { "epoch": 3.99, "learning_rate": 8.79981976748137e-05, "loss": 0.2594, "step": 267 }, { "epoch": 4.0, "eval_loss": 0.49737870693206787, "eval_matthews_correlation": 0.5395539646127814, "eval_runtime": 1.0788, "eval_samples_per_second": 966.811, "eval_steps_per_second": 61.179, "step": 268 }, { "epoch": 4.03, "learning_rate": 8.770184641508439e-05, "loss": 0.2586, "step": 270 }, { "epoch": 4.07, "learning_rate": 8.740239294815345e-05, "loss": 0.2208, "step": 273 }, { "epoch": 4.12, "learning_rate": 8.70998619138407e-05, "loss": 0.1708, "step": 276 }, { "epoch": 4.16, "learning_rate": 8.679427820519625e-05, "loss": 0.2191, "step": 279 }, { "epoch": 4.21, "learning_rate": 8.648566696645233e-05, "loss": 0.2213, "step": 282 }, { "epoch": 4.25, "learning_rate": 8.617405359095437e-05, "loss": 0.2575, "step": 285 }, { "epoch": 4.3, "learning_rate": 8.585946371907138e-05, "loss": 0.2006, "step": 288 }, { "epoch": 4.34, "learning_rate": 8.55419232360865e-05, "loss": 0.2023, "step": 291 }, { "epoch": 4.39, "learning_rate": 8.522145827006675e-05, "loss": 0.1837, "step": 294 }, { "epoch": 4.43, "learning_rate": 8.489809518971348e-05, "loss": 0.2486, "step": 297 }, { "epoch": 4.48, "learning_rate": 8.457186060219239e-05, "loss": 0.2044, "step": 300 }, { "epoch": 4.52, "learning_rate": 8.42427813509444e-05, "loss": 0.1813, "step": 303 }, { "epoch": 4.57, "learning_rate": 8.391088451347688e-05, "loss": 0.2795, "step": 306 }, { "epoch": 4.61, "learning_rate": 8.357619739913557e-05, "loss": 0.1769, "step": 309 }, { "epoch": 4.66, "learning_rate": 8.323874754685755e-05, "loss": 0.1875, "step": 312 }, { "epoch": 4.7, "learning_rate": 8.289856272290527e-05, "loss": 0.2291, "step": 315 }, { "epoch": 4.75, "learning_rate": 8.255567091858182e-05, "loss": 0.2097, "step": 318 }, { "epoch": 4.79, "learning_rate": 8.22101003479278e-05, "loss": 0.2009, "step": 321 }, { "epoch": 4.84, "learning_rate": 8.186187944539973e-05, "loss": 0.2932, "step": 324 }, { "epoch": 4.88, "learning_rate": 8.151103686353042e-05, "loss": 0.2447, "step": 327 }, { "epoch": 4.93, "learning_rate": 8.115760147057138e-05, "loss": 0.202, "step": 330 }, { "epoch": 4.97, "learning_rate": 8.080160234811742e-05, "loss": 0.21, "step": 333 }, { "epoch": 5.0, "eval_loss": 0.5594205856323242, "eval_matthews_correlation": 0.5181917740456299, "eval_runtime": 1.065, "eval_samples_per_second": 979.357, "eval_steps_per_second": 61.973, "step": 335 }, { "epoch": 5.01, "learning_rate": 8.044306878871375e-05, "loss": 0.2023, "step": 336 }, { "epoch": 5.06, "learning_rate": 8.00820302934458e-05, "loss": 0.1664, "step": 339 }, { "epoch": 5.1, "learning_rate": 7.971851656951161e-05, "loss": 0.2118, "step": 342 }, { "epoch": 5.15, "learning_rate": 7.935255752777764e-05, "loss": 0.1459, "step": 345 }, { "epoch": 5.19, "learning_rate": 7.898418328031752e-05, "loss": 0.1239, "step": 348 }, { "epoch": 5.24, "learning_rate": 7.861342413793433e-05, "loss": 0.2143, "step": 351 }, { "epoch": 5.28, "learning_rate": 7.824031060766662e-05, "loss": 0.1456, "step": 354 }, { "epoch": 5.33, "learning_rate": 7.786487339027815e-05, "loss": 0.1705, "step": 357 }, { "epoch": 5.37, "learning_rate": 7.748714337773179e-05, "loss": 0.1135, "step": 360 }, { "epoch": 5.42, "learning_rate": 7.710715165064765e-05, "loss": 0.2111, "step": 363 }, { "epoch": 5.46, "learning_rate": 7.672492947574566e-05, "loss": 0.2572, "step": 366 }, { "epoch": 5.51, "learning_rate": 7.634050830327282e-05, "loss": 0.2522, "step": 369 }, { "epoch": 5.55, "learning_rate": 7.59539197644155e-05, "loss": 0.1322, "step": 372 }, { "epoch": 5.6, "learning_rate": 7.556519566869666e-05, "loss": 0.224, "step": 375 }, { "epoch": 5.64, "learning_rate": 7.517436800135853e-05, "loss": 0.1831, "step": 378 }, { "epoch": 5.69, "learning_rate": 7.47814689207307e-05, "loss": 0.1731, "step": 381 }, { "epoch": 5.73, "learning_rate": 7.438653075558412e-05, "loss": 0.2192, "step": 384 }, { "epoch": 5.78, "learning_rate": 7.398958600247103e-05, "loss": 0.1522, "step": 387 }, { "epoch": 5.82, "learning_rate": 7.359066732305095e-05, "loss": 0.2241, "step": 390 }, { "epoch": 5.87, "learning_rate": 7.318980754140326e-05, "loss": 0.2172, "step": 393 }, { "epoch": 5.91, "learning_rate": 7.278703964132639e-05, "loss": 0.1706, "step": 396 }, { "epoch": 5.96, "learning_rate": 7.238239676362372e-05, "loss": 0.1813, "step": 399 }, { "epoch": 6.0, "learning_rate": 7.197591220337679e-05, "loss": 0.1526, "step": 402 }, { "epoch": 6.0, "eval_loss": 0.5715296268463135, "eval_matthews_correlation": 0.5149844966342378, "eval_runtime": 1.0444, "eval_samples_per_second": 998.702, "eval_steps_per_second": 63.197, "step": 402 }, { "epoch": 6.04, "learning_rate": 7.156761940720555e-05, "loss": 0.1491, "step": 405 }, { "epoch": 6.09, "learning_rate": 7.115755197051645e-05, "loss": 0.1685, "step": 408 }, { "epoch": 6.13, "learning_rate": 7.074574363473798e-05, "loss": 0.1871, "step": 411 }, { "epoch": 6.18, "learning_rate": 7.033222828454442e-05, "loss": 0.1312, "step": 414 }, { "epoch": 6.22, "learning_rate": 6.991703994506761e-05, "loss": 0.1746, "step": 417 }, { "epoch": 6.27, "learning_rate": 6.950021277909749e-05, "loss": 0.1484, "step": 420 }, { "epoch": 6.31, "learning_rate": 6.908178108427088e-05, "loss": 0.1476, "step": 423 }, { "epoch": 6.36, "learning_rate": 6.866177929024945e-05, "loss": 0.1404, "step": 426 }, { "epoch": 6.4, "learning_rate": 6.824024195588677e-05, "loss": 0.1335, "step": 429 }, { "epoch": 6.45, "learning_rate": 6.781720376638477e-05, "loss": 0.1782, "step": 432 }, { "epoch": 6.49, "learning_rate": 6.739269953043959e-05, "loss": 0.1941, "step": 435 }, { "epoch": 6.54, "learning_rate": 6.696676417737764e-05, "loss": 0.1558, "step": 438 }, { "epoch": 6.58, "learning_rate": 6.653943275428135e-05, "loss": 0.1311, "step": 441 }, { "epoch": 6.63, "learning_rate": 6.611074042310549e-05, "loss": 0.128, "step": 444 }, { "epoch": 6.67, "learning_rate": 6.568072245778394e-05, "loss": 0.1374, "step": 447 }, { "epoch": 6.72, "learning_rate": 6.524941424132719e-05, "loss": 0.1491, "step": 450 }, { "epoch": 6.76, "learning_rate": 6.481685126291106e-05, "loss": 0.1242, "step": 453 }, { "epoch": 6.81, "learning_rate": 6.438306911495648e-05, "loss": 0.1379, "step": 456 }, { "epoch": 6.85, "learning_rate": 6.394810349020083e-05, "loss": 0.168, "step": 459 }, { "epoch": 6.9, "learning_rate": 6.351199017876106e-05, "loss": 0.1601, "step": 462 }, { "epoch": 6.94, "learning_rate": 6.30747650651889e-05, "loss": 0.1641, "step": 465 }, { "epoch": 6.99, "learning_rate": 6.263646412551794e-05, "loss": 0.1775, "step": 468 }, { "epoch": 7.0, "eval_loss": 0.6637021899223328, "eval_matthews_correlation": 0.5019828461798207, "eval_runtime": 1.1986, "eval_samples_per_second": 870.206, "eval_steps_per_second": 55.066, "step": 469 }, { "epoch": 7.03, "learning_rate": 6.219712342430371e-05, "loss": 0.1263, "step": 471 }, { "epoch": 7.07, "learning_rate": 6.175677911165599e-05, "loss": 0.114, "step": 474 }, { "epoch": 7.12, "learning_rate": 6.131546742026438e-05, "loss": 0.0823, "step": 477 }, { "epoch": 7.16, "learning_rate": 6.0873224662416896e-05, "loss": 0.1068, "step": 480 }, { "epoch": 7.21, "learning_rate": 6.04300872270122e-05, "loss": 0.1067, "step": 483 }, { "epoch": 7.25, "learning_rate": 5.998609157656539e-05, "loss": 0.1026, "step": 486 }, { "epoch": 7.3, "learning_rate": 5.954127424420773e-05, "loss": 0.1208, "step": 489 }, { "epoch": 7.34, "learning_rate": 5.9095671830680656e-05, "loss": 0.0778, "step": 492 }, { "epoch": 7.39, "learning_rate": 5.864932100132411e-05, "loss": 0.1066, "step": 495 }, { "epoch": 7.43, "learning_rate": 5.82022584830597e-05, "loss": 0.1086, "step": 498 }, { "epoch": 7.48, "learning_rate": 5.7754521061368684e-05, "loss": 0.1833, "step": 501 }, { "epoch": 7.52, "learning_rate": 5.730614557726509e-05, "loss": 0.1193, "step": 504 }, { "epoch": 7.57, "learning_rate": 5.685716892426445e-05, "loss": 0.1628, "step": 507 }, { "epoch": 7.61, "learning_rate": 5.640762804534806e-05, "loss": 0.1774, "step": 510 }, { "epoch": 7.66, "learning_rate": 5.595755992992317e-05, "loss": 0.1434, "step": 513 }, { "epoch": 7.7, "learning_rate": 5.550700161077945e-05, "loss": 0.0957, "step": 516 }, { "epoch": 7.75, "learning_rate": 5.505599016104187e-05, "loss": 0.1219, "step": 519 }, { "epoch": 7.79, "learning_rate": 5.460456269112013e-05, "loss": 0.1562, "step": 522 }, { "epoch": 7.84, "learning_rate": 5.415275634565517e-05, "loss": 0.136, "step": 525 }, { "epoch": 7.88, "learning_rate": 5.370060830046282e-05, "loss": 0.0868, "step": 528 }, { "epoch": 7.93, "learning_rate": 5.3248155759474846e-05, "loss": 0.1028, "step": 531 }, { "epoch": 7.97, "learning_rate": 5.2795435951677785e-05, "loss": 0.1681, "step": 534 }, { "epoch": 8.0, "eval_loss": 0.6957959532737732, "eval_matthews_correlation": 0.5131045571647604, "eval_runtime": 1.0682, "eval_samples_per_second": 976.38, "eval_steps_per_second": 61.784, "step": 536 }, { "epoch": 8.01, "learning_rate": 5.234248612804952e-05, "loss": 0.1163, "step": 537 }, { "epoch": 8.06, "learning_rate": 5.1889343558494266e-05, "loss": 0.1022, "step": 540 }, { "epoch": 8.1, "learning_rate": 5.14360455287759e-05, "loss": 0.0854, "step": 543 }, { "epoch": 8.15, "learning_rate": 5.098262933744994e-05, "loss": 0.112, "step": 546 }, { "epoch": 8.19, "learning_rate": 5.052913229279459e-05, "loss": 0.1067, "step": 549 }, { "epoch": 8.24, "learning_rate": 5.007559170974084e-05, "loss": 0.083, "step": 552 }, { "epoch": 8.28, "learning_rate": 4.962204490680216e-05, "loss": 0.085, "step": 555 }, { "epoch": 8.33, "learning_rate": 4.9168529203003814e-05, "loss": 0.1075, "step": 558 }, { "epoch": 8.37, "learning_rate": 4.871508191481211e-05, "loss": 0.1416, "step": 561 }, { "epoch": 8.42, "learning_rate": 4.826174035306398e-05, "loss": 0.1248, "step": 564 }, { "epoch": 8.46, "learning_rate": 4.7808541819896885e-05, "loss": 0.0748, "step": 567 }, { "epoch": 8.51, "learning_rate": 4.735552360567952e-05, "loss": 0.0707, "step": 570 }, { "epoch": 8.55, "learning_rate": 4.6902722985943444e-05, "loss": 0.0778, "step": 573 }, { "epoch": 8.6, "learning_rate": 4.645017721831602e-05, "loss": 0.1447, "step": 576 }, { "epoch": 8.64, "learning_rate": 4.599792353945466e-05, "loss": 0.0839, "step": 579 }, { "epoch": 8.69, "learning_rate": 4.5545999161982953e-05, "loss": 0.1268, "step": 582 }, { "epoch": 8.73, "learning_rate": 4.509444127142871e-05, "loss": 0.0705, "step": 585 }, { "epoch": 8.78, "learning_rate": 4.464328702316427e-05, "loss": 0.1196, "step": 588 }, { "epoch": 8.82, "learning_rate": 4.419257353934915e-05, "loss": 0.1071, "step": 591 }, { "epoch": 8.87, "learning_rate": 4.374233790587565e-05, "loss": 0.1177, "step": 594 }, { "epoch": 8.91, "learning_rate": 4.329261716931727e-05, "loss": 0.104, "step": 597 }, { "epoch": 8.96, "learning_rate": 4.284344833388047e-05, "loss": 0.0596, "step": 600 }, { "epoch": 9.0, "learning_rate": 4.2394868358359774e-05, "loss": 0.124, "step": 603 }, { "epoch": 9.0, "eval_loss": 0.7057417631149292, "eval_matthews_correlation": 0.5153742778418894, "eval_runtime": 1.2043, "eval_samples_per_second": 866.095, "eval_steps_per_second": 54.806, "step": 603 }, { "epoch": 9.04, "learning_rate": 4.1946914153096795e-05, "loss": 0.0838, "step": 606 }, { "epoch": 9.09, "learning_rate": 4.149962257694315e-05, "loss": 0.0915, "step": 609 }, { "epoch": 9.13, "learning_rate": 4.105303043422753e-05, "loss": 0.0818, "step": 612 }, { "epoch": 9.18, "learning_rate": 4.060717447172743e-05, "loss": 0.0508, "step": 615 }, { "epoch": 9.22, "learning_rate": 4.0162091375645493e-05, "loss": 0.1156, "step": 618 }, { "epoch": 9.27, "learning_rate": 3.971781776859093e-05, "loss": 0.0697, "step": 621 }, { "epoch": 9.31, "learning_rate": 3.9274390206565956e-05, "loss": 0.0563, "step": 624 }, { "epoch": 9.36, "learning_rate": 3.883184517595807e-05, "loss": 0.0993, "step": 627 }, { "epoch": 9.4, "learning_rate": 3.83902190905377e-05, "loss": 0.0621, "step": 630 }, { "epoch": 9.45, "learning_rate": 3.794954828846208e-05, "loss": 0.0788, "step": 633 }, { "epoch": 9.49, "learning_rate": 3.7509869029285215e-05, "loss": 0.1224, "step": 636 }, { "epoch": 9.54, "learning_rate": 3.707121749097431e-05, "loss": 0.0759, "step": 639 }, { "epoch": 9.58, "learning_rate": 3.663362976693304e-05, "loss": 0.0925, "step": 642 }, { "epoch": 9.63, "learning_rate": 3.619714186303162e-05, "loss": 0.0908, "step": 645 }, { "epoch": 9.67, "learning_rate": 3.576178969464414e-05, "loss": 0.12, "step": 648 }, { "epoch": 9.72, "learning_rate": 3.532760908369344e-05, "loss": 0.0629, "step": 651 }, { "epoch": 9.76, "learning_rate": 3.489463575570349e-05, "loss": 0.0506, "step": 654 }, { "epoch": 9.81, "learning_rate": 3.446290533685984e-05, "loss": 0.0738, "step": 657 }, { "epoch": 9.85, "learning_rate": 3.403245335107822e-05, "loss": 0.0586, "step": 660 }, { "epoch": 9.9, "learning_rate": 3.360331521708149e-05, "loss": 0.106, "step": 663 }, { "epoch": 9.94, "learning_rate": 3.3175526245485366e-05, "loss": 0.111, "step": 666 }, { "epoch": 9.99, "learning_rate": 3.274912163589291e-05, "loss": 0.1111, "step": 669 }, { "epoch": 10.0, "eval_loss": 0.8173357844352722, "eval_matthews_correlation": 0.5074384885743003, "eval_runtime": 1.0523, "eval_samples_per_second": 991.19, "eval_steps_per_second": 62.722, "step": 670 }, { "epoch": 10.03, "learning_rate": 3.2324136473998204e-05, "loss": 0.0763, "step": 672 }, { "epoch": 10.07, "learning_rate": 3.190060572869948e-05, "loss": 0.07, "step": 675 }, { "epoch": 10.12, "learning_rate": 3.147856424922174e-05, "loss": 0.051, "step": 678 }, { "epoch": 10.16, "learning_rate": 3.1058046762249224e-05, "loss": 0.0454, "step": 681 }, { "epoch": 10.21, "learning_rate": 3.063908786906812e-05, "loss": 0.0952, "step": 684 }, { "epoch": 10.25, "learning_rate": 3.022172204271938e-05, "loss": 0.0582, "step": 687 }, { "epoch": 10.3, "learning_rate": 2.9805983625162227e-05, "loss": 0.0653, "step": 690 }, { "epoch": 10.34, "learning_rate": 2.93919068244484e-05, "loss": 0.0617, "step": 693 }, { "epoch": 10.39, "learning_rate": 2.897952571190743e-05, "loss": 0.0648, "step": 696 }, { "epoch": 10.43, "learning_rate": 2.8568874219343155e-05, "loss": 0.0503, "step": 699 }, { "epoch": 10.48, "learning_rate": 2.8159986136241732e-05, "loss": 0.1065, "step": 702 }, { "epoch": 10.52, "learning_rate": 2.7752895106991384e-05, "loss": 0.1168, "step": 705 }, { "epoch": 10.57, "learning_rate": 2.7347634628113916e-05, "loss": 0.0471, "step": 708 }, { "epoch": 10.61, "learning_rate": 2.6944238045508708e-05, "loss": 0.0782, "step": 711 }, { "epoch": 10.66, "learning_rate": 2.6542738551708828e-05, "loss": 0.0773, "step": 714 }, { "epoch": 10.7, "learning_rate": 2.6143169183149874e-05, "loss": 0.0629, "step": 717 }, { "epoch": 10.75, "learning_rate": 2.5745562817451686e-05, "loss": 0.0662, "step": 720 }, { "epoch": 10.79, "learning_rate": 2.5349952170712977e-05, "loss": 0.0882, "step": 723 }, { "epoch": 10.84, "learning_rate": 2.4956369794819535e-05, "loss": 0.0508, "step": 726 }, { "epoch": 10.88, "learning_rate": 2.4564848074765674e-05, "loss": 0.0855, "step": 729 }, { "epoch": 10.93, "learning_rate": 2.417541922598945e-05, "loss": 0.0617, "step": 732 }, { "epoch": 10.97, "learning_rate": 2.378811529172203e-05, "loss": 0.1332, "step": 735 }, { "epoch": 11.0, "eval_loss": 0.8252650499343872, "eval_matthews_correlation": 0.5260499940984096, "eval_runtime": 1.0581, "eval_samples_per_second": 985.733, "eval_steps_per_second": 62.376, "step": 737 }, { "epoch": 11.01, "learning_rate": 2.340296814035101e-05, "loss": 0.0843, "step": 738 }, { "epoch": 11.06, "learning_rate": 2.3020009462798163e-05, "loss": 0.0988, "step": 741 }, { "epoch": 11.1, "learning_rate": 2.263927076991193e-05, "loss": 0.0605, "step": 744 }, { "epoch": 11.15, "learning_rate": 2.226078338987453e-05, "loss": 0.0337, "step": 747 }, { "epoch": 11.19, "learning_rate": 2.1884578465624257e-05, "loss": 0.0412, "step": 750 }, { "epoch": 11.24, "learning_rate": 2.1510686952292934e-05, "loss": 0.0611, "step": 753 }, { "epoch": 11.28, "learning_rate": 2.1139139614658798e-05, "loss": 0.0587, "step": 756 }, { "epoch": 11.33, "learning_rate": 2.0769967024615183e-05, "loss": 0.0204, "step": 759 }, { "epoch": 11.37, "learning_rate": 2.0403199558654945e-05, "loss": 0.1346, "step": 762 }, { "epoch": 11.42, "learning_rate": 2.0038867395370936e-05, "loss": 0.0577, "step": 765 }, { "epoch": 11.46, "learning_rate": 1.967700051297295e-05, "loss": 0.0754, "step": 768 }, { "epoch": 11.51, "learning_rate": 1.931762868682098e-05, "loss": 0.0537, "step": 771 }, { "epoch": 11.55, "learning_rate": 1.8960781486975143e-05, "loss": 0.0334, "step": 774 }, { "epoch": 11.6, "learning_rate": 1.860648827576278e-05, "loss": 0.0651, "step": 777 }, { "epoch": 11.64, "learning_rate": 1.8254778205362206e-05, "loss": 0.0742, "step": 780 }, { "epoch": 11.69, "learning_rate": 1.7905680215404174e-05, "loss": 0.1158, "step": 783 }, { "epoch": 11.73, "learning_rate": 1.7559223030590577e-05, "loss": 0.0916, "step": 786 }, { "epoch": 11.78, "learning_rate": 1.7215435158330855e-05, "loss": 0.0286, "step": 789 }, { "epoch": 11.82, "learning_rate": 1.6874344886396438e-05, "loss": 0.0687, "step": 792 }, { "epoch": 11.87, "learning_rate": 1.65359802805931e-05, "loss": 0.0705, "step": 795 }, { "epoch": 11.91, "learning_rate": 1.6200369182451564e-05, "loss": 0.0717, "step": 798 }, { "epoch": 11.96, "learning_rate": 1.586753920693676e-05, "loss": 0.0445, "step": 801 }, { "epoch": 12.0, "learning_rate": 1.553751774017551e-05, "loss": 0.0673, "step": 804 }, { "epoch": 12.0, "eval_loss": 0.8085535764694214, "eval_matthews_correlation": 0.5179780196184617, "eval_runtime": 1.0516, "eval_samples_per_second": 991.801, "eval_steps_per_second": 62.76, "step": 804 }, { "epoch": 12.04, "learning_rate": 1.5210331937203088e-05, "loss": 0.094, "step": 807 }, { "epoch": 12.09, "learning_rate": 1.4886008719728989e-05, "loss": 0.0523, "step": 810 }, { "epoch": 12.13, "learning_rate": 1.4564574773921514e-05, "loss": 0.0667, "step": 813 }, { "epoch": 12.18, "learning_rate": 1.4246056548212172e-05, "loss": 0.0533, "step": 816 }, { "epoch": 12.22, "learning_rate": 1.3930480251119321e-05, "loss": 0.0358, "step": 819 }, { "epoch": 12.27, "learning_rate": 1.3617871849091657e-05, "loss": 0.0532, "step": 822 }, { "epoch": 12.31, "learning_rate": 1.3308257064371677e-05, "loss": 0.0208, "step": 825 }, { "epoch": 12.36, "learning_rate": 1.3001661372879192e-05, "loss": 0.0506, "step": 828 }, { "epoch": 12.4, "learning_rate": 1.2698110002115004e-05, "loss": 0.0827, "step": 831 }, { "epoch": 12.45, "learning_rate": 1.2397627929085248e-05, "loss": 0.0764, "step": 834 }, { "epoch": 12.49, "learning_rate": 1.2100239878246133e-05, "loss": 0.0863, "step": 837 }, { "epoch": 12.54, "learning_rate": 1.1805970319469589e-05, "loss": 0.0572, "step": 840 }, { "epoch": 12.58, "learning_rate": 1.1514843466029807e-05, "loss": 0.103, "step": 843 }, { "epoch": 12.63, "learning_rate": 1.1226883272610877e-05, "loss": 0.0234, "step": 846 }, { "epoch": 12.67, "learning_rate": 1.0942113433335788e-05, "loss": 0.0521, "step": 849 }, { "epoch": 12.72, "learning_rate": 1.0660557379816816e-05, "loss": 0.0459, "step": 852 }, { "epoch": 12.76, "learning_rate": 1.0382238279227419e-05, "loss": 0.0695, "step": 855 }, { "epoch": 12.81, "learning_rate": 1.0107179032396086e-05, "loss": 0.076, "step": 858 }, { "epoch": 12.85, "learning_rate": 9.835402271921974e-06, "loss": 0.0414, "step": 861 }, { "epoch": 12.9, "learning_rate": 9.56693036031256e-06, "loss": 0.0643, "step": 864 }, { "epoch": 12.94, "learning_rate": 9.301785388143697e-06, "loss": 0.0413, "step": 867 }, { "epoch": 12.99, "learning_rate": 9.039989172241886e-06, "loss": 0.0512, "step": 870 }, { "epoch": 13.0, "eval_loss": 0.8409435749053955, "eval_matthews_correlation": 0.5127766293248668, "eval_runtime": 1.0686, "eval_samples_per_second": 976.072, "eval_steps_per_second": 61.765, "step": 871 }, { "epoch": 13.03, "learning_rate": 8.781563253889164e-06, "loss": 0.0642, "step": 873 }, { "epoch": 13.07, "learning_rate": 8.52652889705059e-06, "loss": 0.0699, "step": 876 }, { "epoch": 13.12, "learning_rate": 8.274907086624594e-06, "loss": 0.0431, "step": 879 }, { "epoch": 13.16, "learning_rate": 8.026718526716342e-06, "loss": 0.035, "step": 882 }, { "epoch": 13.21, "learning_rate": 7.781983638934092e-06, "loss": 0.0535, "step": 885 }, { "epoch": 13.25, "learning_rate": 7.5407225607088405e-06, "loss": 0.0553, "step": 888 }, { "epoch": 13.3, "learning_rate": 7.302955143637419e-06, "loss": 0.0515, "step": 891 }, { "epoch": 13.34, "learning_rate": 7.068700951849011e-06, "loss": 0.047, "step": 894 }, { "epoch": 13.39, "learning_rate": 6.837979260395349e-06, "loss": 0.0787, "step": 897 }, { "epoch": 13.43, "learning_rate": 6.610809053664768e-06, "loss": 0.0559, "step": 900 }, { "epoch": 13.48, "learning_rate": 6.387209023820073e-06, "loss": 0.0669, "step": 903 }, { "epoch": 13.52, "learning_rate": 6.1671975692605185e-06, "loss": 0.0828, "step": 906 }, { "epoch": 13.57, "learning_rate": 5.950792793107934e-06, "loss": 0.0373, "step": 909 }, { "epoch": 13.61, "learning_rate": 5.738012501717144e-06, "loss": 0.0586, "step": 912 }, { "epoch": 13.66, "learning_rate": 5.528874203210827e-06, "loss": 0.034, "step": 915 }, { "epoch": 13.7, "learning_rate": 5.323395106038909e-06, "loss": 0.0595, "step": 918 }, { "epoch": 13.75, "learning_rate": 5.121592117562573e-06, "loss": 0.0788, "step": 921 }, { "epoch": 13.79, "learning_rate": 4.923481842663114e-06, "loss": 0.0449, "step": 924 }, { "epoch": 13.84, "learning_rate": 4.729080582375633e-06, "loss": 0.0459, "step": 927 }, { "epoch": 13.88, "learning_rate": 4.538404332547719e-06, "loss": 0.0721, "step": 930 }, { "epoch": 13.93, "learning_rate": 4.351468782523316e-06, "loss": 0.1029, "step": 933 }, { "epoch": 13.97, "learning_rate": 4.168289313851731e-06, "loss": 0.0457, "step": 936 }, { "epoch": 14.0, "eval_loss": 0.8759517073631287, "eval_matthews_correlation": 0.4946640161033145, "eval_runtime": 1.079, "eval_samples_per_second": 966.594, "eval_steps_per_second": 61.165, "step": 938 }, { "epoch": 14.01, "learning_rate": 3.9888809990219985e-06, "loss": 0.0621, "step": 939 }, { "epoch": 14.06, "learning_rate": 3.8132586002227076e-06, "loss": 0.0333, "step": 942 }, { "epoch": 14.1, "learning_rate": 3.641436568127271e-06, "loss": 0.0461, "step": 945 }, { "epoch": 14.15, "learning_rate": 3.4734290407049397e-06, "loss": 0.0532, "step": 948 }, { "epoch": 14.19, "learning_rate": 3.309249842057499e-06, "loss": 0.044, "step": 951 }, { "epoch": 14.24, "learning_rate": 3.148912481281713e-06, "loss": 0.0556, "step": 954 }, { "epoch": 14.28, "learning_rate": 2.99243015135785e-06, "loss": 0.0417, "step": 957 }, { "epoch": 14.33, "learning_rate": 2.839815728064077e-06, "loss": 0.0593, "step": 960 }, { "epoch": 14.37, "learning_rate": 2.6910817689169922e-06, "loss": 0.0653, "step": 963 }, { "epoch": 14.42, "learning_rate": 2.5462405121384423e-06, "loss": 0.0558, "step": 966 }, { "epoch": 14.46, "learning_rate": 2.405303875648418e-06, "loss": 0.077, "step": 969 }, { "epoch": 14.51, "learning_rate": 2.268283456084491e-06, "loss": 0.0763, "step": 972 }, { "epoch": 14.55, "learning_rate": 2.135190527847608e-06, "loss": 0.075, "step": 975 }, { "epoch": 14.6, "learning_rate": 2.0060360421743584e-06, "loss": 0.0172, "step": 978 }, { "epoch": 14.64, "learning_rate": 1.8808306262359243e-06, "loss": 0.034, "step": 981 }, { "epoch": 14.69, "learning_rate": 1.7595845822636293e-06, "loss": 0.0643, "step": 984 }, { "epoch": 14.73, "learning_rate": 1.6423078867012386e-06, "loss": 0.0568, "step": 987 }, { "epoch": 14.78, "learning_rate": 1.529010189384078e-06, "loss": 0.0609, "step": 990 }, { "epoch": 14.82, "learning_rate": 1.419700812745045e-06, "loss": 0.0454, "step": 993 }, { "epoch": 14.87, "learning_rate": 1.3143887510474629e-06, "loss": 0.0688, "step": 996 }, { "epoch": 14.91, "learning_rate": 1.2130826696450992e-06, "loss": 0.0528, "step": 999 }, { "epoch": 14.96, "learning_rate": 1.1157909042690928e-06, "loss": 0.0491, "step": 1002 }, { "epoch": 15.0, "learning_rate": 1.0225214603420851e-06, "loss": 0.04, "step": 1005 }, { "epoch": 15.0, "eval_loss": 0.8521906137466431, "eval_matthews_correlation": 0.5103199460790546, "eval_runtime": 1.1012, "eval_samples_per_second": 947.135, "eval_steps_per_second": 59.934, "step": 1005 }, { "epoch": 15.04, "learning_rate": 9.332820123195418e-07, "loss": 0.0588, "step": 1008 }, { "epoch": 15.09, "learning_rate": 8.480799030582187e-07, "loss": 0.0311, "step": 1011 }, { "epoch": 15.13, "learning_rate": 7.669221432120288e-07, "loss": 0.0655, "step": 1014 }, { "epoch": 15.18, "learning_rate": 6.898154106551702e-07, "loss": 0.0682, "step": 1017 }, { "epoch": 15.22, "learning_rate": 6.167660499326322e-07, "loss": 0.0736, "step": 1020 }, { "epoch": 15.27, "learning_rate": 5.477800717381687e-07, "loss": 0.0778, "step": 1023 }, { "epoch": 15.31, "learning_rate": 4.828631524197325e-07, "loss": 0.0917, "step": 1026 }, { "epoch": 15.36, "learning_rate": 4.2202063351238774e-07, "loss": 0.0468, "step": 1029 }, { "epoch": 15.4, "learning_rate": 3.652575212987952e-07, "loss": 0.0243, "step": 1032 }, { "epoch": 15.45, "learning_rate": 3.1257848639730803e-07, "loss": 0.0767, "step": 1035 }, { "epoch": 15.49, "learning_rate": 2.6398786337762936e-07, "loss": 0.0509, "step": 1038 }, { "epoch": 15.54, "learning_rate": 2.1948965040417547e-07, "loss": 0.0546, "step": 1041 }, { "epoch": 15.58, "learning_rate": 1.790875089070887e-07, "loss": 0.0451, "step": 1044 }, { "epoch": 15.63, "learning_rate": 1.427847632809509e-07, "loss": 0.0781, "step": 1047 }, { "epoch": 15.67, "learning_rate": 1.1058440061127439e-07, "loss": 0.0233, "step": 1050 }, { "epoch": 15.72, "learning_rate": 8.248907042868737e-08, "loss": 0.0393, "step": 1053 }, { "epoch": 15.76, "learning_rate": 5.850108449094727e-08, "loss": 0.0612, "step": 1056 }, { "epoch": 15.81, "learning_rate": 3.862241659269294e-08, "loss": 0.0476, "step": 1059 }, { "epoch": 15.85, "learning_rate": 2.2854702403068972e-08, "loss": 0.0283, "step": 1062 }, { "epoch": 15.9, "learning_rate": 1.1199239331122214e-08, "loss": 0.0482, "step": 1065 }, { "epoch": 15.94, "learning_rate": 3.6569864190427738e-09, "loss": 0.052, "step": 1068 }, { "epoch": 15.99, "learning_rate": 2.2856426326045565e-10, "loss": 0.0485, "step": 1071 }, { "epoch": 16.0, "eval_loss": 0.8556408882141113, "eval_matthews_correlation": 0.5076423377649488, "eval_runtime": 1.1842, "eval_samples_per_second": 880.772, "eval_steps_per_second": 55.734, "step": 1072 }, { "epoch": 16.0, "step": 1072, "total_flos": 2253110839083008.0, "train_loss": 0.1824943411264763, "train_runtime": 434.1785, "train_samples_per_second": 315.115, "train_steps_per_second": 2.469 } ], "max_steps": 1072, "num_train_epochs": 16, "total_flos": 2253110839083008.0, "trial_name": null, "trial_params": null }