pszemraj's picture
End of training
64ec365
{
"best_metric": 0.5395539646127814,
"best_model_checkpoint": "./runtime-text-classification/xtremedistil-l12-h384-uncased-CoLA/checkpoint-268",
"epoch": 16.0,
"global_step": 1072,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 9.090909090909091e-06,
"loss": 0.6853,
"step": 3
},
{
"epoch": 0.09,
"learning_rate": 1.8181818181818182e-05,
"loss": 0.6765,
"step": 6
},
{
"epoch": 0.13,
"learning_rate": 2.7272727272727273e-05,
"loss": 0.6581,
"step": 9
},
{
"epoch": 0.18,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.6444,
"step": 12
},
{
"epoch": 0.22,
"learning_rate": 4.545454545454546e-05,
"loss": 0.6218,
"step": 15
},
{
"epoch": 0.27,
"learning_rate": 5.4545454545454546e-05,
"loss": 0.6467,
"step": 18
},
{
"epoch": 0.31,
"learning_rate": 6.363636363636364e-05,
"loss": 0.594,
"step": 21
},
{
"epoch": 0.36,
"learning_rate": 7.272727272727273e-05,
"loss": 0.6193,
"step": 24
},
{
"epoch": 0.4,
"learning_rate": 8.181818181818183e-05,
"loss": 0.6009,
"step": 27
},
{
"epoch": 0.45,
"learning_rate": 9.090909090909092e-05,
"loss": 0.5942,
"step": 30
},
{
"epoch": 0.49,
"learning_rate": 0.0001,
"loss": 0.5839,
"step": 33
},
{
"epoch": 0.54,
"learning_rate": 9.999794293416862e-05,
"loss": 0.5991,
"step": 36
},
{
"epoch": 0.58,
"learning_rate": 9.999177190593525e-05,
"loss": 0.5808,
"step": 39
},
{
"epoch": 0.63,
"learning_rate": 9.998148742306837e-05,
"loss": 0.5599,
"step": 42
},
{
"epoch": 0.67,
"learning_rate": 9.996709033180229e-05,
"loss": 0.5859,
"step": 45
},
{
"epoch": 0.72,
"learning_rate": 9.99485818167676e-05,
"loss": 0.5573,
"step": 48
},
{
"epoch": 0.76,
"learning_rate": 9.992596340089365e-05,
"loss": 0.5645,
"step": 51
},
{
"epoch": 0.81,
"learning_rate": 9.989923694528327e-05,
"loss": 0.513,
"step": 54
},
{
"epoch": 0.85,
"learning_rate": 9.986840464905958e-05,
"loss": 0.5347,
"step": 57
},
{
"epoch": 0.9,
"learning_rate": 9.983346904918513e-05,
"loss": 0.5333,
"step": 60
},
{
"epoch": 0.94,
"learning_rate": 9.979443302025305e-05,
"loss": 0.5549,
"step": 63
},
{
"epoch": 0.99,
"learning_rate": 9.975129977425062e-05,
"loss": 0.4822,
"step": 66
},
{
"epoch": 1.0,
"eval_loss": 0.5893396139144897,
"eval_matthews_correlation": 0.26208039166807634,
"eval_runtime": 1.0615,
"eval_samples_per_second": 982.602,
"eval_steps_per_second": 62.178,
"step": 67
},
{
"epoch": 1.03,
"learning_rate": 9.970407286029487e-05,
"loss": 0.4866,
"step": 69
},
{
"epoch": 1.07,
"learning_rate": 9.965275616434067e-05,
"loss": 0.4771,
"step": 72
},
{
"epoch": 1.12,
"learning_rate": 9.959735390886086e-05,
"loss": 0.506,
"step": 75
},
{
"epoch": 1.16,
"learning_rate": 9.953787065249896e-05,
"loss": 0.5117,
"step": 78
},
{
"epoch": 1.21,
"learning_rate": 9.947431128969387e-05,
"loss": 0.4754,
"step": 81
},
{
"epoch": 1.25,
"learning_rate": 9.940668105027739e-05,
"loss": 0.5221,
"step": 84
},
{
"epoch": 1.3,
"learning_rate": 9.933498549904368e-05,
"loss": 0.4806,
"step": 87
},
{
"epoch": 1.34,
"learning_rate": 9.925923053529147e-05,
"loss": 0.4426,
"step": 90
},
{
"epoch": 1.39,
"learning_rate": 9.917942239233871e-05,
"loss": 0.5009,
"step": 93
},
{
"epoch": 1.43,
"learning_rate": 9.909556763700951e-05,
"loss": 0.4601,
"step": 96
},
{
"epoch": 1.48,
"learning_rate": 9.900767316909396e-05,
"loss": 0.4366,
"step": 99
},
{
"epoch": 1.52,
"learning_rate": 9.891574622078034e-05,
"loss": 0.4403,
"step": 102
},
{
"epoch": 1.57,
"learning_rate": 9.881979435606003e-05,
"loss": 0.3992,
"step": 105
},
{
"epoch": 1.61,
"learning_rate": 9.871982547010513e-05,
"loss": 0.464,
"step": 108
},
{
"epoch": 1.66,
"learning_rate": 9.861584778861878e-05,
"loss": 0.4349,
"step": 111
},
{
"epoch": 1.7,
"learning_rate": 9.850786986715846e-05,
"loss": 0.4249,
"step": 114
},
{
"epoch": 1.75,
"learning_rate": 9.839590059043184e-05,
"loss": 0.4344,
"step": 117
},
{
"epoch": 1.79,
"learning_rate": 9.827994917156587e-05,
"loss": 0.4342,
"step": 120
},
{
"epoch": 1.84,
"learning_rate": 9.816002515134865e-05,
"loss": 0.4304,
"step": 123
},
{
"epoch": 1.88,
"learning_rate": 9.80361383974443e-05,
"loss": 0.4584,
"step": 126
},
{
"epoch": 1.93,
"learning_rate": 9.790829910358122e-05,
"loss": 0.4151,
"step": 129
},
{
"epoch": 1.97,
"learning_rate": 9.777651778871309e-05,
"loss": 0.4669,
"step": 132
},
{
"epoch": 2.0,
"eval_loss": 0.5811270475387573,
"eval_matthews_correlation": 0.3721681506432673,
"eval_runtime": 1.0734,
"eval_samples_per_second": 971.718,
"eval_steps_per_second": 61.489,
"step": 134
},
{
"epoch": 2.01,
"learning_rate": 9.764080529615351e-05,
"loss": 0.4085,
"step": 135
},
{
"epoch": 2.06,
"learning_rate": 9.750117279268378e-05,
"loss": 0.3755,
"step": 138
},
{
"epoch": 2.1,
"learning_rate": 9.735763176763394e-05,
"loss": 0.3877,
"step": 141
},
{
"epoch": 2.15,
"learning_rate": 9.721019403193753e-05,
"loss": 0.3769,
"step": 144
},
{
"epoch": 2.19,
"learning_rate": 9.705887171715966e-05,
"loss": 0.3468,
"step": 147
},
{
"epoch": 2.24,
"learning_rate": 9.690367727449888e-05,
"loss": 0.3736,
"step": 150
},
{
"epoch": 2.28,
"learning_rate": 9.674462347376259e-05,
"loss": 0.3744,
"step": 153
},
{
"epoch": 2.33,
"learning_rate": 9.658172340231635e-05,
"loss": 0.3203,
"step": 156
},
{
"epoch": 2.37,
"learning_rate": 9.6414990464007e-05,
"loss": 0.3398,
"step": 159
},
{
"epoch": 2.42,
"learning_rate": 9.624443837805972e-05,
"loss": 0.3255,
"step": 162
},
{
"epoch": 2.46,
"learning_rate": 9.607008117794928e-05,
"loss": 0.349,
"step": 165
},
{
"epoch": 2.51,
"learning_rate": 9.589193321024524e-05,
"loss": 0.4708,
"step": 168
},
{
"epoch": 2.55,
"learning_rate": 9.571000913343148e-05,
"loss": 0.3565,
"step": 171
},
{
"epoch": 2.6,
"learning_rate": 9.552432391670009e-05,
"loss": 0.3502,
"step": 174
},
{
"epoch": 2.64,
"learning_rate": 9.533489283871966e-05,
"loss": 0.3605,
"step": 177
},
{
"epoch": 2.69,
"learning_rate": 9.51417314863781e-05,
"loss": 0.4047,
"step": 180
},
{
"epoch": 2.73,
"learning_rate": 9.494485575350014e-05,
"loss": 0.3681,
"step": 183
},
{
"epoch": 2.78,
"learning_rate": 9.474428183953951e-05,
"loss": 0.4035,
"step": 186
},
{
"epoch": 2.82,
"learning_rate": 9.454002624824598e-05,
"loss": 0.3704,
"step": 189
},
{
"epoch": 2.87,
"learning_rate": 9.433210578630749e-05,
"loss": 0.3648,
"step": 192
},
{
"epoch": 2.91,
"learning_rate": 9.412053756196715e-05,
"loss": 0.3498,
"step": 195
},
{
"epoch": 2.96,
"learning_rate": 9.390533898361555e-05,
"loss": 0.3759,
"step": 198
},
{
"epoch": 3.0,
"learning_rate": 9.368652775835843e-05,
"loss": 0.3077,
"step": 201
},
{
"epoch": 3.0,
"eval_loss": 0.615044891834259,
"eval_matthews_correlation": 0.4382589402952836,
"eval_runtime": 1.0332,
"eval_samples_per_second": 1009.479,
"eval_steps_per_second": 63.879,
"step": 201
},
{
"epoch": 3.04,
"learning_rate": 9.346412189055955e-05,
"loss": 0.2746,
"step": 204
},
{
"epoch": 3.09,
"learning_rate": 9.323813968035936e-05,
"loss": 0.242,
"step": 207
},
{
"epoch": 3.13,
"learning_rate": 9.300859972216924e-05,
"loss": 0.2428,
"step": 210
},
{
"epoch": 3.18,
"learning_rate": 9.277552090314135e-05,
"loss": 0.337,
"step": 213
},
{
"epoch": 3.22,
"learning_rate": 9.253892240161466e-05,
"loss": 0.2882,
"step": 216
},
{
"epoch": 3.27,
"learning_rate": 9.229882368553692e-05,
"loss": 0.2743,
"step": 219
},
{
"epoch": 3.31,
"learning_rate": 9.205524451086274e-05,
"loss": 0.2693,
"step": 222
},
{
"epoch": 3.36,
"learning_rate": 9.180820491992799e-05,
"loss": 0.2557,
"step": 225
},
{
"epoch": 3.4,
"learning_rate": 9.155772523980075e-05,
"loss": 0.296,
"step": 228
},
{
"epoch": 3.45,
"learning_rate": 9.130382608060868e-05,
"loss": 0.2739,
"step": 231
},
{
"epoch": 3.49,
"learning_rate": 9.104652833384317e-05,
"loss": 0.2848,
"step": 234
},
{
"epoch": 3.54,
"learning_rate": 9.078585317064036e-05,
"loss": 0.2613,
"step": 237
},
{
"epoch": 3.58,
"learning_rate": 9.052182204003909e-05,
"loss": 0.2775,
"step": 240
},
{
"epoch": 3.63,
"learning_rate": 9.025445666721608e-05,
"loss": 0.2558,
"step": 243
},
{
"epoch": 3.67,
"learning_rate": 8.998377905169822e-05,
"loss": 0.3344,
"step": 246
},
{
"epoch": 3.72,
"learning_rate": 8.970981146555247e-05,
"loss": 0.3003,
"step": 249
},
{
"epoch": 3.76,
"learning_rate": 8.943257645155327e-05,
"loss": 0.2598,
"step": 252
},
{
"epoch": 3.81,
"learning_rate": 8.91520968213276e-05,
"loss": 0.2584,
"step": 255
},
{
"epoch": 3.85,
"learning_rate": 8.886839565347797e-05,
"loss": 0.2581,
"step": 258
},
{
"epoch": 3.9,
"learning_rate": 8.858149629168357e-05,
"loss": 0.3605,
"step": 261
},
{
"epoch": 3.94,
"learning_rate": 8.829142234277936e-05,
"loss": 0.3091,
"step": 264
},
{
"epoch": 3.99,
"learning_rate": 8.79981976748137e-05,
"loss": 0.2594,
"step": 267
},
{
"epoch": 4.0,
"eval_loss": 0.49737870693206787,
"eval_matthews_correlation": 0.5395539646127814,
"eval_runtime": 1.0788,
"eval_samples_per_second": 966.811,
"eval_steps_per_second": 61.179,
"step": 268
},
{
"epoch": 4.03,
"learning_rate": 8.770184641508439e-05,
"loss": 0.2586,
"step": 270
},
{
"epoch": 4.07,
"learning_rate": 8.740239294815345e-05,
"loss": 0.2208,
"step": 273
},
{
"epoch": 4.12,
"learning_rate": 8.70998619138407e-05,
"loss": 0.1708,
"step": 276
},
{
"epoch": 4.16,
"learning_rate": 8.679427820519625e-05,
"loss": 0.2191,
"step": 279
},
{
"epoch": 4.21,
"learning_rate": 8.648566696645233e-05,
"loss": 0.2213,
"step": 282
},
{
"epoch": 4.25,
"learning_rate": 8.617405359095437e-05,
"loss": 0.2575,
"step": 285
},
{
"epoch": 4.3,
"learning_rate": 8.585946371907138e-05,
"loss": 0.2006,
"step": 288
},
{
"epoch": 4.34,
"learning_rate": 8.55419232360865e-05,
"loss": 0.2023,
"step": 291
},
{
"epoch": 4.39,
"learning_rate": 8.522145827006675e-05,
"loss": 0.1837,
"step": 294
},
{
"epoch": 4.43,
"learning_rate": 8.489809518971348e-05,
"loss": 0.2486,
"step": 297
},
{
"epoch": 4.48,
"learning_rate": 8.457186060219239e-05,
"loss": 0.2044,
"step": 300
},
{
"epoch": 4.52,
"learning_rate": 8.42427813509444e-05,
"loss": 0.1813,
"step": 303
},
{
"epoch": 4.57,
"learning_rate": 8.391088451347688e-05,
"loss": 0.2795,
"step": 306
},
{
"epoch": 4.61,
"learning_rate": 8.357619739913557e-05,
"loss": 0.1769,
"step": 309
},
{
"epoch": 4.66,
"learning_rate": 8.323874754685755e-05,
"loss": 0.1875,
"step": 312
},
{
"epoch": 4.7,
"learning_rate": 8.289856272290527e-05,
"loss": 0.2291,
"step": 315
},
{
"epoch": 4.75,
"learning_rate": 8.255567091858182e-05,
"loss": 0.2097,
"step": 318
},
{
"epoch": 4.79,
"learning_rate": 8.22101003479278e-05,
"loss": 0.2009,
"step": 321
},
{
"epoch": 4.84,
"learning_rate": 8.186187944539973e-05,
"loss": 0.2932,
"step": 324
},
{
"epoch": 4.88,
"learning_rate": 8.151103686353042e-05,
"loss": 0.2447,
"step": 327
},
{
"epoch": 4.93,
"learning_rate": 8.115760147057138e-05,
"loss": 0.202,
"step": 330
},
{
"epoch": 4.97,
"learning_rate": 8.080160234811742e-05,
"loss": 0.21,
"step": 333
},
{
"epoch": 5.0,
"eval_loss": 0.5594205856323242,
"eval_matthews_correlation": 0.5181917740456299,
"eval_runtime": 1.065,
"eval_samples_per_second": 979.357,
"eval_steps_per_second": 61.973,
"step": 335
},
{
"epoch": 5.01,
"learning_rate": 8.044306878871375e-05,
"loss": 0.2023,
"step": 336
},
{
"epoch": 5.06,
"learning_rate": 8.00820302934458e-05,
"loss": 0.1664,
"step": 339
},
{
"epoch": 5.1,
"learning_rate": 7.971851656951161e-05,
"loss": 0.2118,
"step": 342
},
{
"epoch": 5.15,
"learning_rate": 7.935255752777764e-05,
"loss": 0.1459,
"step": 345
},
{
"epoch": 5.19,
"learning_rate": 7.898418328031752e-05,
"loss": 0.1239,
"step": 348
},
{
"epoch": 5.24,
"learning_rate": 7.861342413793433e-05,
"loss": 0.2143,
"step": 351
},
{
"epoch": 5.28,
"learning_rate": 7.824031060766662e-05,
"loss": 0.1456,
"step": 354
},
{
"epoch": 5.33,
"learning_rate": 7.786487339027815e-05,
"loss": 0.1705,
"step": 357
},
{
"epoch": 5.37,
"learning_rate": 7.748714337773179e-05,
"loss": 0.1135,
"step": 360
},
{
"epoch": 5.42,
"learning_rate": 7.710715165064765e-05,
"loss": 0.2111,
"step": 363
},
{
"epoch": 5.46,
"learning_rate": 7.672492947574566e-05,
"loss": 0.2572,
"step": 366
},
{
"epoch": 5.51,
"learning_rate": 7.634050830327282e-05,
"loss": 0.2522,
"step": 369
},
{
"epoch": 5.55,
"learning_rate": 7.59539197644155e-05,
"loss": 0.1322,
"step": 372
},
{
"epoch": 5.6,
"learning_rate": 7.556519566869666e-05,
"loss": 0.224,
"step": 375
},
{
"epoch": 5.64,
"learning_rate": 7.517436800135853e-05,
"loss": 0.1831,
"step": 378
},
{
"epoch": 5.69,
"learning_rate": 7.47814689207307e-05,
"loss": 0.1731,
"step": 381
},
{
"epoch": 5.73,
"learning_rate": 7.438653075558412e-05,
"loss": 0.2192,
"step": 384
},
{
"epoch": 5.78,
"learning_rate": 7.398958600247103e-05,
"loss": 0.1522,
"step": 387
},
{
"epoch": 5.82,
"learning_rate": 7.359066732305095e-05,
"loss": 0.2241,
"step": 390
},
{
"epoch": 5.87,
"learning_rate": 7.318980754140326e-05,
"loss": 0.2172,
"step": 393
},
{
"epoch": 5.91,
"learning_rate": 7.278703964132639e-05,
"loss": 0.1706,
"step": 396
},
{
"epoch": 5.96,
"learning_rate": 7.238239676362372e-05,
"loss": 0.1813,
"step": 399
},
{
"epoch": 6.0,
"learning_rate": 7.197591220337679e-05,
"loss": 0.1526,
"step": 402
},
{
"epoch": 6.0,
"eval_loss": 0.5715296268463135,
"eval_matthews_correlation": 0.5149844966342378,
"eval_runtime": 1.0444,
"eval_samples_per_second": 998.702,
"eval_steps_per_second": 63.197,
"step": 402
},
{
"epoch": 6.04,
"learning_rate": 7.156761940720555e-05,
"loss": 0.1491,
"step": 405
},
{
"epoch": 6.09,
"learning_rate": 7.115755197051645e-05,
"loss": 0.1685,
"step": 408
},
{
"epoch": 6.13,
"learning_rate": 7.074574363473798e-05,
"loss": 0.1871,
"step": 411
},
{
"epoch": 6.18,
"learning_rate": 7.033222828454442e-05,
"loss": 0.1312,
"step": 414
},
{
"epoch": 6.22,
"learning_rate": 6.991703994506761e-05,
"loss": 0.1746,
"step": 417
},
{
"epoch": 6.27,
"learning_rate": 6.950021277909749e-05,
"loss": 0.1484,
"step": 420
},
{
"epoch": 6.31,
"learning_rate": 6.908178108427088e-05,
"loss": 0.1476,
"step": 423
},
{
"epoch": 6.36,
"learning_rate": 6.866177929024945e-05,
"loss": 0.1404,
"step": 426
},
{
"epoch": 6.4,
"learning_rate": 6.824024195588677e-05,
"loss": 0.1335,
"step": 429
},
{
"epoch": 6.45,
"learning_rate": 6.781720376638477e-05,
"loss": 0.1782,
"step": 432
},
{
"epoch": 6.49,
"learning_rate": 6.739269953043959e-05,
"loss": 0.1941,
"step": 435
},
{
"epoch": 6.54,
"learning_rate": 6.696676417737764e-05,
"loss": 0.1558,
"step": 438
},
{
"epoch": 6.58,
"learning_rate": 6.653943275428135e-05,
"loss": 0.1311,
"step": 441
},
{
"epoch": 6.63,
"learning_rate": 6.611074042310549e-05,
"loss": 0.128,
"step": 444
},
{
"epoch": 6.67,
"learning_rate": 6.568072245778394e-05,
"loss": 0.1374,
"step": 447
},
{
"epoch": 6.72,
"learning_rate": 6.524941424132719e-05,
"loss": 0.1491,
"step": 450
},
{
"epoch": 6.76,
"learning_rate": 6.481685126291106e-05,
"loss": 0.1242,
"step": 453
},
{
"epoch": 6.81,
"learning_rate": 6.438306911495648e-05,
"loss": 0.1379,
"step": 456
},
{
"epoch": 6.85,
"learning_rate": 6.394810349020083e-05,
"loss": 0.168,
"step": 459
},
{
"epoch": 6.9,
"learning_rate": 6.351199017876106e-05,
"loss": 0.1601,
"step": 462
},
{
"epoch": 6.94,
"learning_rate": 6.30747650651889e-05,
"loss": 0.1641,
"step": 465
},
{
"epoch": 6.99,
"learning_rate": 6.263646412551794e-05,
"loss": 0.1775,
"step": 468
},
{
"epoch": 7.0,
"eval_loss": 0.6637021899223328,
"eval_matthews_correlation": 0.5019828461798207,
"eval_runtime": 1.1986,
"eval_samples_per_second": 870.206,
"eval_steps_per_second": 55.066,
"step": 469
},
{
"epoch": 7.03,
"learning_rate": 6.219712342430371e-05,
"loss": 0.1263,
"step": 471
},
{
"epoch": 7.07,
"learning_rate": 6.175677911165599e-05,
"loss": 0.114,
"step": 474
},
{
"epoch": 7.12,
"learning_rate": 6.131546742026438e-05,
"loss": 0.0823,
"step": 477
},
{
"epoch": 7.16,
"learning_rate": 6.0873224662416896e-05,
"loss": 0.1068,
"step": 480
},
{
"epoch": 7.21,
"learning_rate": 6.04300872270122e-05,
"loss": 0.1067,
"step": 483
},
{
"epoch": 7.25,
"learning_rate": 5.998609157656539e-05,
"loss": 0.1026,
"step": 486
},
{
"epoch": 7.3,
"learning_rate": 5.954127424420773e-05,
"loss": 0.1208,
"step": 489
},
{
"epoch": 7.34,
"learning_rate": 5.9095671830680656e-05,
"loss": 0.0778,
"step": 492
},
{
"epoch": 7.39,
"learning_rate": 5.864932100132411e-05,
"loss": 0.1066,
"step": 495
},
{
"epoch": 7.43,
"learning_rate": 5.82022584830597e-05,
"loss": 0.1086,
"step": 498
},
{
"epoch": 7.48,
"learning_rate": 5.7754521061368684e-05,
"loss": 0.1833,
"step": 501
},
{
"epoch": 7.52,
"learning_rate": 5.730614557726509e-05,
"loss": 0.1193,
"step": 504
},
{
"epoch": 7.57,
"learning_rate": 5.685716892426445e-05,
"loss": 0.1628,
"step": 507
},
{
"epoch": 7.61,
"learning_rate": 5.640762804534806e-05,
"loss": 0.1774,
"step": 510
},
{
"epoch": 7.66,
"learning_rate": 5.595755992992317e-05,
"loss": 0.1434,
"step": 513
},
{
"epoch": 7.7,
"learning_rate": 5.550700161077945e-05,
"loss": 0.0957,
"step": 516
},
{
"epoch": 7.75,
"learning_rate": 5.505599016104187e-05,
"loss": 0.1219,
"step": 519
},
{
"epoch": 7.79,
"learning_rate": 5.460456269112013e-05,
"loss": 0.1562,
"step": 522
},
{
"epoch": 7.84,
"learning_rate": 5.415275634565517e-05,
"loss": 0.136,
"step": 525
},
{
"epoch": 7.88,
"learning_rate": 5.370060830046282e-05,
"loss": 0.0868,
"step": 528
},
{
"epoch": 7.93,
"learning_rate": 5.3248155759474846e-05,
"loss": 0.1028,
"step": 531
},
{
"epoch": 7.97,
"learning_rate": 5.2795435951677785e-05,
"loss": 0.1681,
"step": 534
},
{
"epoch": 8.0,
"eval_loss": 0.6957959532737732,
"eval_matthews_correlation": 0.5131045571647604,
"eval_runtime": 1.0682,
"eval_samples_per_second": 976.38,
"eval_steps_per_second": 61.784,
"step": 536
},
{
"epoch": 8.01,
"learning_rate": 5.234248612804952e-05,
"loss": 0.1163,
"step": 537
},
{
"epoch": 8.06,
"learning_rate": 5.1889343558494266e-05,
"loss": 0.1022,
"step": 540
},
{
"epoch": 8.1,
"learning_rate": 5.14360455287759e-05,
"loss": 0.0854,
"step": 543
},
{
"epoch": 8.15,
"learning_rate": 5.098262933744994e-05,
"loss": 0.112,
"step": 546
},
{
"epoch": 8.19,
"learning_rate": 5.052913229279459e-05,
"loss": 0.1067,
"step": 549
},
{
"epoch": 8.24,
"learning_rate": 5.007559170974084e-05,
"loss": 0.083,
"step": 552
},
{
"epoch": 8.28,
"learning_rate": 4.962204490680216e-05,
"loss": 0.085,
"step": 555
},
{
"epoch": 8.33,
"learning_rate": 4.9168529203003814e-05,
"loss": 0.1075,
"step": 558
},
{
"epoch": 8.37,
"learning_rate": 4.871508191481211e-05,
"loss": 0.1416,
"step": 561
},
{
"epoch": 8.42,
"learning_rate": 4.826174035306398e-05,
"loss": 0.1248,
"step": 564
},
{
"epoch": 8.46,
"learning_rate": 4.7808541819896885e-05,
"loss": 0.0748,
"step": 567
},
{
"epoch": 8.51,
"learning_rate": 4.735552360567952e-05,
"loss": 0.0707,
"step": 570
},
{
"epoch": 8.55,
"learning_rate": 4.6902722985943444e-05,
"loss": 0.0778,
"step": 573
},
{
"epoch": 8.6,
"learning_rate": 4.645017721831602e-05,
"loss": 0.1447,
"step": 576
},
{
"epoch": 8.64,
"learning_rate": 4.599792353945466e-05,
"loss": 0.0839,
"step": 579
},
{
"epoch": 8.69,
"learning_rate": 4.5545999161982953e-05,
"loss": 0.1268,
"step": 582
},
{
"epoch": 8.73,
"learning_rate": 4.509444127142871e-05,
"loss": 0.0705,
"step": 585
},
{
"epoch": 8.78,
"learning_rate": 4.464328702316427e-05,
"loss": 0.1196,
"step": 588
},
{
"epoch": 8.82,
"learning_rate": 4.419257353934915e-05,
"loss": 0.1071,
"step": 591
},
{
"epoch": 8.87,
"learning_rate": 4.374233790587565e-05,
"loss": 0.1177,
"step": 594
},
{
"epoch": 8.91,
"learning_rate": 4.329261716931727e-05,
"loss": 0.104,
"step": 597
},
{
"epoch": 8.96,
"learning_rate": 4.284344833388047e-05,
"loss": 0.0596,
"step": 600
},
{
"epoch": 9.0,
"learning_rate": 4.2394868358359774e-05,
"loss": 0.124,
"step": 603
},
{
"epoch": 9.0,
"eval_loss": 0.7057417631149292,
"eval_matthews_correlation": 0.5153742778418894,
"eval_runtime": 1.2043,
"eval_samples_per_second": 866.095,
"eval_steps_per_second": 54.806,
"step": 603
},
{
"epoch": 9.04,
"learning_rate": 4.1946914153096795e-05,
"loss": 0.0838,
"step": 606
},
{
"epoch": 9.09,
"learning_rate": 4.149962257694315e-05,
"loss": 0.0915,
"step": 609
},
{
"epoch": 9.13,
"learning_rate": 4.105303043422753e-05,
"loss": 0.0818,
"step": 612
},
{
"epoch": 9.18,
"learning_rate": 4.060717447172743e-05,
"loss": 0.0508,
"step": 615
},
{
"epoch": 9.22,
"learning_rate": 4.0162091375645493e-05,
"loss": 0.1156,
"step": 618
},
{
"epoch": 9.27,
"learning_rate": 3.971781776859093e-05,
"loss": 0.0697,
"step": 621
},
{
"epoch": 9.31,
"learning_rate": 3.9274390206565956e-05,
"loss": 0.0563,
"step": 624
},
{
"epoch": 9.36,
"learning_rate": 3.883184517595807e-05,
"loss": 0.0993,
"step": 627
},
{
"epoch": 9.4,
"learning_rate": 3.83902190905377e-05,
"loss": 0.0621,
"step": 630
},
{
"epoch": 9.45,
"learning_rate": 3.794954828846208e-05,
"loss": 0.0788,
"step": 633
},
{
"epoch": 9.49,
"learning_rate": 3.7509869029285215e-05,
"loss": 0.1224,
"step": 636
},
{
"epoch": 9.54,
"learning_rate": 3.707121749097431e-05,
"loss": 0.0759,
"step": 639
},
{
"epoch": 9.58,
"learning_rate": 3.663362976693304e-05,
"loss": 0.0925,
"step": 642
},
{
"epoch": 9.63,
"learning_rate": 3.619714186303162e-05,
"loss": 0.0908,
"step": 645
},
{
"epoch": 9.67,
"learning_rate": 3.576178969464414e-05,
"loss": 0.12,
"step": 648
},
{
"epoch": 9.72,
"learning_rate": 3.532760908369344e-05,
"loss": 0.0629,
"step": 651
},
{
"epoch": 9.76,
"learning_rate": 3.489463575570349e-05,
"loss": 0.0506,
"step": 654
},
{
"epoch": 9.81,
"learning_rate": 3.446290533685984e-05,
"loss": 0.0738,
"step": 657
},
{
"epoch": 9.85,
"learning_rate": 3.403245335107822e-05,
"loss": 0.0586,
"step": 660
},
{
"epoch": 9.9,
"learning_rate": 3.360331521708149e-05,
"loss": 0.106,
"step": 663
},
{
"epoch": 9.94,
"learning_rate": 3.3175526245485366e-05,
"loss": 0.111,
"step": 666
},
{
"epoch": 9.99,
"learning_rate": 3.274912163589291e-05,
"loss": 0.1111,
"step": 669
},
{
"epoch": 10.0,
"eval_loss": 0.8173357844352722,
"eval_matthews_correlation": 0.5074384885743003,
"eval_runtime": 1.0523,
"eval_samples_per_second": 991.19,
"eval_steps_per_second": 62.722,
"step": 670
},
{
"epoch": 10.03,
"learning_rate": 3.2324136473998204e-05,
"loss": 0.0763,
"step": 672
},
{
"epoch": 10.07,
"learning_rate": 3.190060572869948e-05,
"loss": 0.07,
"step": 675
},
{
"epoch": 10.12,
"learning_rate": 3.147856424922174e-05,
"loss": 0.051,
"step": 678
},
{
"epoch": 10.16,
"learning_rate": 3.1058046762249224e-05,
"loss": 0.0454,
"step": 681
},
{
"epoch": 10.21,
"learning_rate": 3.063908786906812e-05,
"loss": 0.0952,
"step": 684
},
{
"epoch": 10.25,
"learning_rate": 3.022172204271938e-05,
"loss": 0.0582,
"step": 687
},
{
"epoch": 10.3,
"learning_rate": 2.9805983625162227e-05,
"loss": 0.0653,
"step": 690
},
{
"epoch": 10.34,
"learning_rate": 2.93919068244484e-05,
"loss": 0.0617,
"step": 693
},
{
"epoch": 10.39,
"learning_rate": 2.897952571190743e-05,
"loss": 0.0648,
"step": 696
},
{
"epoch": 10.43,
"learning_rate": 2.8568874219343155e-05,
"loss": 0.0503,
"step": 699
},
{
"epoch": 10.48,
"learning_rate": 2.8159986136241732e-05,
"loss": 0.1065,
"step": 702
},
{
"epoch": 10.52,
"learning_rate": 2.7752895106991384e-05,
"loss": 0.1168,
"step": 705
},
{
"epoch": 10.57,
"learning_rate": 2.7347634628113916e-05,
"loss": 0.0471,
"step": 708
},
{
"epoch": 10.61,
"learning_rate": 2.6944238045508708e-05,
"loss": 0.0782,
"step": 711
},
{
"epoch": 10.66,
"learning_rate": 2.6542738551708828e-05,
"loss": 0.0773,
"step": 714
},
{
"epoch": 10.7,
"learning_rate": 2.6143169183149874e-05,
"loss": 0.0629,
"step": 717
},
{
"epoch": 10.75,
"learning_rate": 2.5745562817451686e-05,
"loss": 0.0662,
"step": 720
},
{
"epoch": 10.79,
"learning_rate": 2.5349952170712977e-05,
"loss": 0.0882,
"step": 723
},
{
"epoch": 10.84,
"learning_rate": 2.4956369794819535e-05,
"loss": 0.0508,
"step": 726
},
{
"epoch": 10.88,
"learning_rate": 2.4564848074765674e-05,
"loss": 0.0855,
"step": 729
},
{
"epoch": 10.93,
"learning_rate": 2.417541922598945e-05,
"loss": 0.0617,
"step": 732
},
{
"epoch": 10.97,
"learning_rate": 2.378811529172203e-05,
"loss": 0.1332,
"step": 735
},
{
"epoch": 11.0,
"eval_loss": 0.8252650499343872,
"eval_matthews_correlation": 0.5260499940984096,
"eval_runtime": 1.0581,
"eval_samples_per_second": 985.733,
"eval_steps_per_second": 62.376,
"step": 737
},
{
"epoch": 11.01,
"learning_rate": 2.340296814035101e-05,
"loss": 0.0843,
"step": 738
},
{
"epoch": 11.06,
"learning_rate": 2.3020009462798163e-05,
"loss": 0.0988,
"step": 741
},
{
"epoch": 11.1,
"learning_rate": 2.263927076991193e-05,
"loss": 0.0605,
"step": 744
},
{
"epoch": 11.15,
"learning_rate": 2.226078338987453e-05,
"loss": 0.0337,
"step": 747
},
{
"epoch": 11.19,
"learning_rate": 2.1884578465624257e-05,
"loss": 0.0412,
"step": 750
},
{
"epoch": 11.24,
"learning_rate": 2.1510686952292934e-05,
"loss": 0.0611,
"step": 753
},
{
"epoch": 11.28,
"learning_rate": 2.1139139614658798e-05,
"loss": 0.0587,
"step": 756
},
{
"epoch": 11.33,
"learning_rate": 2.0769967024615183e-05,
"loss": 0.0204,
"step": 759
},
{
"epoch": 11.37,
"learning_rate": 2.0403199558654945e-05,
"loss": 0.1346,
"step": 762
},
{
"epoch": 11.42,
"learning_rate": 2.0038867395370936e-05,
"loss": 0.0577,
"step": 765
},
{
"epoch": 11.46,
"learning_rate": 1.967700051297295e-05,
"loss": 0.0754,
"step": 768
},
{
"epoch": 11.51,
"learning_rate": 1.931762868682098e-05,
"loss": 0.0537,
"step": 771
},
{
"epoch": 11.55,
"learning_rate": 1.8960781486975143e-05,
"loss": 0.0334,
"step": 774
},
{
"epoch": 11.6,
"learning_rate": 1.860648827576278e-05,
"loss": 0.0651,
"step": 777
},
{
"epoch": 11.64,
"learning_rate": 1.8254778205362206e-05,
"loss": 0.0742,
"step": 780
},
{
"epoch": 11.69,
"learning_rate": 1.7905680215404174e-05,
"loss": 0.1158,
"step": 783
},
{
"epoch": 11.73,
"learning_rate": 1.7559223030590577e-05,
"loss": 0.0916,
"step": 786
},
{
"epoch": 11.78,
"learning_rate": 1.7215435158330855e-05,
"loss": 0.0286,
"step": 789
},
{
"epoch": 11.82,
"learning_rate": 1.6874344886396438e-05,
"loss": 0.0687,
"step": 792
},
{
"epoch": 11.87,
"learning_rate": 1.65359802805931e-05,
"loss": 0.0705,
"step": 795
},
{
"epoch": 11.91,
"learning_rate": 1.6200369182451564e-05,
"loss": 0.0717,
"step": 798
},
{
"epoch": 11.96,
"learning_rate": 1.586753920693676e-05,
"loss": 0.0445,
"step": 801
},
{
"epoch": 12.0,
"learning_rate": 1.553751774017551e-05,
"loss": 0.0673,
"step": 804
},
{
"epoch": 12.0,
"eval_loss": 0.8085535764694214,
"eval_matthews_correlation": 0.5179780196184617,
"eval_runtime": 1.0516,
"eval_samples_per_second": 991.801,
"eval_steps_per_second": 62.76,
"step": 804
},
{
"epoch": 12.04,
"learning_rate": 1.5210331937203088e-05,
"loss": 0.094,
"step": 807
},
{
"epoch": 12.09,
"learning_rate": 1.4886008719728989e-05,
"loss": 0.0523,
"step": 810
},
{
"epoch": 12.13,
"learning_rate": 1.4564574773921514e-05,
"loss": 0.0667,
"step": 813
},
{
"epoch": 12.18,
"learning_rate": 1.4246056548212172e-05,
"loss": 0.0533,
"step": 816
},
{
"epoch": 12.22,
"learning_rate": 1.3930480251119321e-05,
"loss": 0.0358,
"step": 819
},
{
"epoch": 12.27,
"learning_rate": 1.3617871849091657e-05,
"loss": 0.0532,
"step": 822
},
{
"epoch": 12.31,
"learning_rate": 1.3308257064371677e-05,
"loss": 0.0208,
"step": 825
},
{
"epoch": 12.36,
"learning_rate": 1.3001661372879192e-05,
"loss": 0.0506,
"step": 828
},
{
"epoch": 12.4,
"learning_rate": 1.2698110002115004e-05,
"loss": 0.0827,
"step": 831
},
{
"epoch": 12.45,
"learning_rate": 1.2397627929085248e-05,
"loss": 0.0764,
"step": 834
},
{
"epoch": 12.49,
"learning_rate": 1.2100239878246133e-05,
"loss": 0.0863,
"step": 837
},
{
"epoch": 12.54,
"learning_rate": 1.1805970319469589e-05,
"loss": 0.0572,
"step": 840
},
{
"epoch": 12.58,
"learning_rate": 1.1514843466029807e-05,
"loss": 0.103,
"step": 843
},
{
"epoch": 12.63,
"learning_rate": 1.1226883272610877e-05,
"loss": 0.0234,
"step": 846
},
{
"epoch": 12.67,
"learning_rate": 1.0942113433335788e-05,
"loss": 0.0521,
"step": 849
},
{
"epoch": 12.72,
"learning_rate": 1.0660557379816816e-05,
"loss": 0.0459,
"step": 852
},
{
"epoch": 12.76,
"learning_rate": 1.0382238279227419e-05,
"loss": 0.0695,
"step": 855
},
{
"epoch": 12.81,
"learning_rate": 1.0107179032396086e-05,
"loss": 0.076,
"step": 858
},
{
"epoch": 12.85,
"learning_rate": 9.835402271921974e-06,
"loss": 0.0414,
"step": 861
},
{
"epoch": 12.9,
"learning_rate": 9.56693036031256e-06,
"loss": 0.0643,
"step": 864
},
{
"epoch": 12.94,
"learning_rate": 9.301785388143697e-06,
"loss": 0.0413,
"step": 867
},
{
"epoch": 12.99,
"learning_rate": 9.039989172241886e-06,
"loss": 0.0512,
"step": 870
},
{
"epoch": 13.0,
"eval_loss": 0.8409435749053955,
"eval_matthews_correlation": 0.5127766293248668,
"eval_runtime": 1.0686,
"eval_samples_per_second": 976.072,
"eval_steps_per_second": 61.765,
"step": 871
},
{
"epoch": 13.03,
"learning_rate": 8.781563253889164e-06,
"loss": 0.0642,
"step": 873
},
{
"epoch": 13.07,
"learning_rate": 8.52652889705059e-06,
"loss": 0.0699,
"step": 876
},
{
"epoch": 13.12,
"learning_rate": 8.274907086624594e-06,
"loss": 0.0431,
"step": 879
},
{
"epoch": 13.16,
"learning_rate": 8.026718526716342e-06,
"loss": 0.035,
"step": 882
},
{
"epoch": 13.21,
"learning_rate": 7.781983638934092e-06,
"loss": 0.0535,
"step": 885
},
{
"epoch": 13.25,
"learning_rate": 7.5407225607088405e-06,
"loss": 0.0553,
"step": 888
},
{
"epoch": 13.3,
"learning_rate": 7.302955143637419e-06,
"loss": 0.0515,
"step": 891
},
{
"epoch": 13.34,
"learning_rate": 7.068700951849011e-06,
"loss": 0.047,
"step": 894
},
{
"epoch": 13.39,
"learning_rate": 6.837979260395349e-06,
"loss": 0.0787,
"step": 897
},
{
"epoch": 13.43,
"learning_rate": 6.610809053664768e-06,
"loss": 0.0559,
"step": 900
},
{
"epoch": 13.48,
"learning_rate": 6.387209023820073e-06,
"loss": 0.0669,
"step": 903
},
{
"epoch": 13.52,
"learning_rate": 6.1671975692605185e-06,
"loss": 0.0828,
"step": 906
},
{
"epoch": 13.57,
"learning_rate": 5.950792793107934e-06,
"loss": 0.0373,
"step": 909
},
{
"epoch": 13.61,
"learning_rate": 5.738012501717144e-06,
"loss": 0.0586,
"step": 912
},
{
"epoch": 13.66,
"learning_rate": 5.528874203210827e-06,
"loss": 0.034,
"step": 915
},
{
"epoch": 13.7,
"learning_rate": 5.323395106038909e-06,
"loss": 0.0595,
"step": 918
},
{
"epoch": 13.75,
"learning_rate": 5.121592117562573e-06,
"loss": 0.0788,
"step": 921
},
{
"epoch": 13.79,
"learning_rate": 4.923481842663114e-06,
"loss": 0.0449,
"step": 924
},
{
"epoch": 13.84,
"learning_rate": 4.729080582375633e-06,
"loss": 0.0459,
"step": 927
},
{
"epoch": 13.88,
"learning_rate": 4.538404332547719e-06,
"loss": 0.0721,
"step": 930
},
{
"epoch": 13.93,
"learning_rate": 4.351468782523316e-06,
"loss": 0.1029,
"step": 933
},
{
"epoch": 13.97,
"learning_rate": 4.168289313851731e-06,
"loss": 0.0457,
"step": 936
},
{
"epoch": 14.0,
"eval_loss": 0.8759517073631287,
"eval_matthews_correlation": 0.4946640161033145,
"eval_runtime": 1.079,
"eval_samples_per_second": 966.594,
"eval_steps_per_second": 61.165,
"step": 938
},
{
"epoch": 14.01,
"learning_rate": 3.9888809990219985e-06,
"loss": 0.0621,
"step": 939
},
{
"epoch": 14.06,
"learning_rate": 3.8132586002227076e-06,
"loss": 0.0333,
"step": 942
},
{
"epoch": 14.1,
"learning_rate": 3.641436568127271e-06,
"loss": 0.0461,
"step": 945
},
{
"epoch": 14.15,
"learning_rate": 3.4734290407049397e-06,
"loss": 0.0532,
"step": 948
},
{
"epoch": 14.19,
"learning_rate": 3.309249842057499e-06,
"loss": 0.044,
"step": 951
},
{
"epoch": 14.24,
"learning_rate": 3.148912481281713e-06,
"loss": 0.0556,
"step": 954
},
{
"epoch": 14.28,
"learning_rate": 2.99243015135785e-06,
"loss": 0.0417,
"step": 957
},
{
"epoch": 14.33,
"learning_rate": 2.839815728064077e-06,
"loss": 0.0593,
"step": 960
},
{
"epoch": 14.37,
"learning_rate": 2.6910817689169922e-06,
"loss": 0.0653,
"step": 963
},
{
"epoch": 14.42,
"learning_rate": 2.5462405121384423e-06,
"loss": 0.0558,
"step": 966
},
{
"epoch": 14.46,
"learning_rate": 2.405303875648418e-06,
"loss": 0.077,
"step": 969
},
{
"epoch": 14.51,
"learning_rate": 2.268283456084491e-06,
"loss": 0.0763,
"step": 972
},
{
"epoch": 14.55,
"learning_rate": 2.135190527847608e-06,
"loss": 0.075,
"step": 975
},
{
"epoch": 14.6,
"learning_rate": 2.0060360421743584e-06,
"loss": 0.0172,
"step": 978
},
{
"epoch": 14.64,
"learning_rate": 1.8808306262359243e-06,
"loss": 0.034,
"step": 981
},
{
"epoch": 14.69,
"learning_rate": 1.7595845822636293e-06,
"loss": 0.0643,
"step": 984
},
{
"epoch": 14.73,
"learning_rate": 1.6423078867012386e-06,
"loss": 0.0568,
"step": 987
},
{
"epoch": 14.78,
"learning_rate": 1.529010189384078e-06,
"loss": 0.0609,
"step": 990
},
{
"epoch": 14.82,
"learning_rate": 1.419700812745045e-06,
"loss": 0.0454,
"step": 993
},
{
"epoch": 14.87,
"learning_rate": 1.3143887510474629e-06,
"loss": 0.0688,
"step": 996
},
{
"epoch": 14.91,
"learning_rate": 1.2130826696450992e-06,
"loss": 0.0528,
"step": 999
},
{
"epoch": 14.96,
"learning_rate": 1.1157909042690928e-06,
"loss": 0.0491,
"step": 1002
},
{
"epoch": 15.0,
"learning_rate": 1.0225214603420851e-06,
"loss": 0.04,
"step": 1005
},
{
"epoch": 15.0,
"eval_loss": 0.8521906137466431,
"eval_matthews_correlation": 0.5103199460790546,
"eval_runtime": 1.1012,
"eval_samples_per_second": 947.135,
"eval_steps_per_second": 59.934,
"step": 1005
},
{
"epoch": 15.04,
"learning_rate": 9.332820123195418e-07,
"loss": 0.0588,
"step": 1008
},
{
"epoch": 15.09,
"learning_rate": 8.480799030582187e-07,
"loss": 0.0311,
"step": 1011
},
{
"epoch": 15.13,
"learning_rate": 7.669221432120288e-07,
"loss": 0.0655,
"step": 1014
},
{
"epoch": 15.18,
"learning_rate": 6.898154106551702e-07,
"loss": 0.0682,
"step": 1017
},
{
"epoch": 15.22,
"learning_rate": 6.167660499326322e-07,
"loss": 0.0736,
"step": 1020
},
{
"epoch": 15.27,
"learning_rate": 5.477800717381687e-07,
"loss": 0.0778,
"step": 1023
},
{
"epoch": 15.31,
"learning_rate": 4.828631524197325e-07,
"loss": 0.0917,
"step": 1026
},
{
"epoch": 15.36,
"learning_rate": 4.2202063351238774e-07,
"loss": 0.0468,
"step": 1029
},
{
"epoch": 15.4,
"learning_rate": 3.652575212987952e-07,
"loss": 0.0243,
"step": 1032
},
{
"epoch": 15.45,
"learning_rate": 3.1257848639730803e-07,
"loss": 0.0767,
"step": 1035
},
{
"epoch": 15.49,
"learning_rate": 2.6398786337762936e-07,
"loss": 0.0509,
"step": 1038
},
{
"epoch": 15.54,
"learning_rate": 2.1948965040417547e-07,
"loss": 0.0546,
"step": 1041
},
{
"epoch": 15.58,
"learning_rate": 1.790875089070887e-07,
"loss": 0.0451,
"step": 1044
},
{
"epoch": 15.63,
"learning_rate": 1.427847632809509e-07,
"loss": 0.0781,
"step": 1047
},
{
"epoch": 15.67,
"learning_rate": 1.1058440061127439e-07,
"loss": 0.0233,
"step": 1050
},
{
"epoch": 15.72,
"learning_rate": 8.248907042868737e-08,
"loss": 0.0393,
"step": 1053
},
{
"epoch": 15.76,
"learning_rate": 5.850108449094727e-08,
"loss": 0.0612,
"step": 1056
},
{
"epoch": 15.81,
"learning_rate": 3.862241659269294e-08,
"loss": 0.0476,
"step": 1059
},
{
"epoch": 15.85,
"learning_rate": 2.2854702403068972e-08,
"loss": 0.0283,
"step": 1062
},
{
"epoch": 15.9,
"learning_rate": 1.1199239331122214e-08,
"loss": 0.0482,
"step": 1065
},
{
"epoch": 15.94,
"learning_rate": 3.6569864190427738e-09,
"loss": 0.052,
"step": 1068
},
{
"epoch": 15.99,
"learning_rate": 2.2856426326045565e-10,
"loss": 0.0485,
"step": 1071
},
{
"epoch": 16.0,
"eval_loss": 0.8556408882141113,
"eval_matthews_correlation": 0.5076423377649488,
"eval_runtime": 1.1842,
"eval_samples_per_second": 880.772,
"eval_steps_per_second": 55.734,
"step": 1072
},
{
"epoch": 16.0,
"step": 1072,
"total_flos": 2253110839083008.0,
"train_loss": 0.1824943411264763,
"train_runtime": 434.1785,
"train_samples_per_second": 315.115,
"train_steps_per_second": 2.469
}
],
"max_steps": 1072,
"num_train_epochs": 16,
"total_flos": 2253110839083008.0,
"trial_name": null,
"trial_params": null
}