pszemraj commited on
Commit
64ec365
·
1 Parent(s): a3c68db

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "eval_loss": 0.49737870693206787,
4
+ "eval_matthews_correlation": 0.5395539646127814,
5
+ "eval_runtime": 1.0791,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 966.502,
8
+ "eval_steps_per_second": 61.159,
9
+ "train_loss": 0.1824943411264763,
10
+ "train_runtime": 434.1785,
11
+ "train_samples": 8551,
12
+ "train_samples_per_second": 315.115,
13
+ "train_steps_per_second": 2.469
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "eval_loss": 0.49737870693206787,
4
+ "eval_matthews_correlation": 0.5395539646127814,
5
+ "eval_runtime": 1.0791,
6
+ "eval_samples": 1043,
7
+ "eval_samples_per_second": 966.502,
8
+ "eval_steps_per_second": 61.159
9
+ }
predict_results_cola.txt ADDED
@@ -0,0 +1,1064 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 acceptable
3
+ 1 acceptable
4
+ 2 acceptable
5
+ 3 acceptable
6
+ 4 unacceptable
7
+ 5 acceptable
8
+ 6 acceptable
9
+ 7 unacceptable
10
+ 8 acceptable
11
+ 9 acceptable
12
+ 10 acceptable
13
+ 11 acceptable
14
+ 12 unacceptable
15
+ 13 acceptable
16
+ 14 acceptable
17
+ 15 acceptable
18
+ 16 unacceptable
19
+ 17 acceptable
20
+ 18 acceptable
21
+ 19 unacceptable
22
+ 20 acceptable
23
+ 21 acceptable
24
+ 22 unacceptable
25
+ 23 acceptable
26
+ 24 acceptable
27
+ 25 unacceptable
28
+ 26 acceptable
29
+ 27 unacceptable
30
+ 28 acceptable
31
+ 29 acceptable
32
+ 30 unacceptable
33
+ 31 acceptable
34
+ 32 unacceptable
35
+ 33 unacceptable
36
+ 34 unacceptable
37
+ 35 unacceptable
38
+ 36 acceptable
39
+ 37 unacceptable
40
+ 38 unacceptable
41
+ 39 unacceptable
42
+ 40 unacceptable
43
+ 41 acceptable
44
+ 42 unacceptable
45
+ 43 acceptable
46
+ 44 acceptable
47
+ 45 unacceptable
48
+ 46 unacceptable
49
+ 47 acceptable
50
+ 48 unacceptable
51
+ 49 acceptable
52
+ 50 unacceptable
53
+ 51 acceptable
54
+ 52 acceptable
55
+ 53 acceptable
56
+ 54 acceptable
57
+ 55 acceptable
58
+ 56 acceptable
59
+ 57 acceptable
60
+ 58 acceptable
61
+ 59 acceptable
62
+ 60 unacceptable
63
+ 61 acceptable
64
+ 62 acceptable
65
+ 63 acceptable
66
+ 64 acceptable
67
+ 65 acceptable
68
+ 66 acceptable
69
+ 67 acceptable
70
+ 68 acceptable
71
+ 69 acceptable
72
+ 70 acceptable
73
+ 71 unacceptable
74
+ 72 unacceptable
75
+ 73 unacceptable
76
+ 74 acceptable
77
+ 75 acceptable
78
+ 76 acceptable
79
+ 77 acceptable
80
+ 78 acceptable
81
+ 79 acceptable
82
+ 80 acceptable
83
+ 81 acceptable
84
+ 82 unacceptable
85
+ 83 acceptable
86
+ 84 acceptable
87
+ 85 acceptable
88
+ 86 acceptable
89
+ 87 acceptable
90
+ 88 acceptable
91
+ 89 acceptable
92
+ 90 acceptable
93
+ 91 acceptable
94
+ 92 unacceptable
95
+ 93 acceptable
96
+ 94 acceptable
97
+ 95 acceptable
98
+ 96 acceptable
99
+ 97 acceptable
100
+ 98 acceptable
101
+ 99 acceptable
102
+ 100 unacceptable
103
+ 101 acceptable
104
+ 102 acceptable
105
+ 103 acceptable
106
+ 104 acceptable
107
+ 105 acceptable
108
+ 106 acceptable
109
+ 107 acceptable
110
+ 108 acceptable
111
+ 109 acceptable
112
+ 110 acceptable
113
+ 111 acceptable
114
+ 112 acceptable
115
+ 113 unacceptable
116
+ 114 acceptable
117
+ 115 unacceptable
118
+ 116 acceptable
119
+ 117 unacceptable
120
+ 118 unacceptable
121
+ 119 acceptable
122
+ 120 unacceptable
123
+ 121 acceptable
124
+ 122 acceptable
125
+ 123 unacceptable
126
+ 124 acceptable
127
+ 125 acceptable
128
+ 126 unacceptable
129
+ 127 acceptable
130
+ 128 acceptable
131
+ 129 acceptable
132
+ 130 unacceptable
133
+ 131 acceptable
134
+ 132 acceptable
135
+ 133 acceptable
136
+ 134 acceptable
137
+ 135 acceptable
138
+ 136 acceptable
139
+ 137 acceptable
140
+ 138 unacceptable
141
+ 139 unacceptable
142
+ 140 unacceptable
143
+ 141 unacceptable
144
+ 142 acceptable
145
+ 143 acceptable
146
+ 144 acceptable
147
+ 145 acceptable
148
+ 146 acceptable
149
+ 147 acceptable
150
+ 148 acceptable
151
+ 149 acceptable
152
+ 150 unacceptable
153
+ 151 acceptable
154
+ 152 acceptable
155
+ 153 acceptable
156
+ 154 acceptable
157
+ 155 acceptable
158
+ 156 unacceptable
159
+ 157 unacceptable
160
+ 158 unacceptable
161
+ 159 acceptable
162
+ 160 acceptable
163
+ 161 acceptable
164
+ 162 acceptable
165
+ 163 acceptable
166
+ 164 acceptable
167
+ 165 unacceptable
168
+ 166 unacceptable
169
+ 167 unacceptable
170
+ 168 acceptable
171
+ 169 acceptable
172
+ 170 acceptable
173
+ 171 acceptable
174
+ 172 acceptable
175
+ 173 acceptable
176
+ 174 acceptable
177
+ 175 acceptable
178
+ 176 acceptable
179
+ 177 acceptable
180
+ 178 acceptable
181
+ 179 acceptable
182
+ 180 acceptable
183
+ 181 acceptable
184
+ 182 unacceptable
185
+ 183 acceptable
186
+ 184 unacceptable
187
+ 185 acceptable
188
+ 186 acceptable
189
+ 187 unacceptable
190
+ 188 unacceptable
191
+ 189 acceptable
192
+ 190 unacceptable
193
+ 191 acceptable
194
+ 192 unacceptable
195
+ 193 acceptable
196
+ 194 unacceptable
197
+ 195 acceptable
198
+ 196 unacceptable
199
+ 197 unacceptable
200
+ 198 acceptable
201
+ 199 acceptable
202
+ 200 acceptable
203
+ 201 unacceptable
204
+ 202 unacceptable
205
+ 203 acceptable
206
+ 204 acceptable
207
+ 205 acceptable
208
+ 206 acceptable
209
+ 207 acceptable
210
+ 208 acceptable
211
+ 209 acceptable
212
+ 210 acceptable
213
+ 211 acceptable
214
+ 212 acceptable
215
+ 213 unacceptable
216
+ 214 acceptable
217
+ 215 acceptable
218
+ 216 acceptable
219
+ 217 acceptable
220
+ 218 acceptable
221
+ 219 unacceptable
222
+ 220 acceptable
223
+ 221 unacceptable
224
+ 222 acceptable
225
+ 223 unacceptable
226
+ 224 unacceptable
227
+ 225 acceptable
228
+ 226 unacceptable
229
+ 227 acceptable
230
+ 228 unacceptable
231
+ 229 acceptable
232
+ 230 acceptable
233
+ 231 acceptable
234
+ 232 acceptable
235
+ 233 acceptable
236
+ 234 acceptable
237
+ 235 acceptable
238
+ 236 unacceptable
239
+ 237 acceptable
240
+ 238 acceptable
241
+ 239 acceptable
242
+ 240 acceptable
243
+ 241 acceptable
244
+ 242 acceptable
245
+ 243 acceptable
246
+ 244 acceptable
247
+ 245 acceptable
248
+ 246 acceptable
249
+ 247 acceptable
250
+ 248 acceptable
251
+ 249 acceptable
252
+ 250 acceptable
253
+ 251 acceptable
254
+ 252 acceptable
255
+ 253 acceptable
256
+ 254 acceptable
257
+ 255 acceptable
258
+ 256 acceptable
259
+ 257 acceptable
260
+ 258 unacceptable
261
+ 259 unacceptable
262
+ 260 unacceptable
263
+ 261 acceptable
264
+ 262 acceptable
265
+ 263 acceptable
266
+ 264 acceptable
267
+ 265 acceptable
268
+ 266 acceptable
269
+ 267 acceptable
270
+ 268 acceptable
271
+ 269 acceptable
272
+ 270 acceptable
273
+ 271 acceptable
274
+ 272 unacceptable
275
+ 273 acceptable
276
+ 274 acceptable
277
+ 275 acceptable
278
+ 276 unacceptable
279
+ 277 unacceptable
280
+ 278 acceptable
281
+ 279 unacceptable
282
+ 280 acceptable
283
+ 281 acceptable
284
+ 282 acceptable
285
+ 283 acceptable
286
+ 284 acceptable
287
+ 285 unacceptable
288
+ 286 unacceptable
289
+ 287 acceptable
290
+ 288 acceptable
291
+ 289 acceptable
292
+ 290 acceptable
293
+ 291 acceptable
294
+ 292 acceptable
295
+ 293 unacceptable
296
+ 294 unacceptable
297
+ 295 acceptable
298
+ 296 acceptable
299
+ 297 acceptable
300
+ 298 acceptable
301
+ 299 acceptable
302
+ 300 acceptable
303
+ 301 unacceptable
304
+ 302 acceptable
305
+ 303 acceptable
306
+ 304 acceptable
307
+ 305 acceptable
308
+ 306 unacceptable
309
+ 307 unacceptable
310
+ 308 acceptable
311
+ 309 acceptable
312
+ 310 acceptable
313
+ 311 acceptable
314
+ 312 acceptable
315
+ 313 acceptable
316
+ 314 unacceptable
317
+ 315 unacceptable
318
+ 316 unacceptable
319
+ 317 unacceptable
320
+ 318 acceptable
321
+ 319 unacceptable
322
+ 320 acceptable
323
+ 321 acceptable
324
+ 322 unacceptable
325
+ 323 acceptable
326
+ 324 acceptable
327
+ 325 acceptable
328
+ 326 acceptable
329
+ 327 acceptable
330
+ 328 acceptable
331
+ 329 acceptable
332
+ 330 acceptable
333
+ 331 acceptable
334
+ 332 acceptable
335
+ 333 acceptable
336
+ 334 acceptable
337
+ 335 acceptable
338
+ 336 unacceptable
339
+ 337 acceptable
340
+ 338 unacceptable
341
+ 339 acceptable
342
+ 340 acceptable
343
+ 341 acceptable
344
+ 342 acceptable
345
+ 343 acceptable
346
+ 344 unacceptable
347
+ 345 acceptable
348
+ 346 acceptable
349
+ 347 acceptable
350
+ 348 acceptable
351
+ 349 acceptable
352
+ 350 acceptable
353
+ 351 unacceptable
354
+ 352 acceptable
355
+ 353 acceptable
356
+ 354 acceptable
357
+ 355 acceptable
358
+ 356 unacceptable
359
+ 357 acceptable
360
+ 358 acceptable
361
+ 359 acceptable
362
+ 360 acceptable
363
+ 361 acceptable
364
+ 362 acceptable
365
+ 363 unacceptable
366
+ 364 acceptable
367
+ 365 acceptable
368
+ 366 acceptable
369
+ 367 acceptable
370
+ 368 acceptable
371
+ 369 acceptable
372
+ 370 unacceptable
373
+ 371 acceptable
374
+ 372 acceptable
375
+ 373 acceptable
376
+ 374 acceptable
377
+ 375 acceptable
378
+ 376 acceptable
379
+ 377 acceptable
380
+ 378 unacceptable
381
+ 379 acceptable
382
+ 380 acceptable
383
+ 381 acceptable
384
+ 382 acceptable
385
+ 383 acceptable
386
+ 384 acceptable
387
+ 385 acceptable
388
+ 386 acceptable
389
+ 387 acceptable
390
+ 388 acceptable
391
+ 389 unacceptable
392
+ 390 acceptable
393
+ 391 unacceptable
394
+ 392 unacceptable
395
+ 393 acceptable
396
+ 394 acceptable
397
+ 395 acceptable
398
+ 396 acceptable
399
+ 397 acceptable
400
+ 398 acceptable
401
+ 399 acceptable
402
+ 400 acceptable
403
+ 401 acceptable
404
+ 402 unacceptable
405
+ 403 acceptable
406
+ 404 acceptable
407
+ 405 acceptable
408
+ 406 acceptable
409
+ 407 acceptable
410
+ 408 acceptable
411
+ 409 acceptable
412
+ 410 acceptable
413
+ 411 unacceptable
414
+ 412 acceptable
415
+ 413 acceptable
416
+ 414 acceptable
417
+ 415 unacceptable
418
+ 416 acceptable
419
+ 417 unacceptable
420
+ 418 unacceptable
421
+ 419 acceptable
422
+ 420 acceptable
423
+ 421 acceptable
424
+ 422 acceptable
425
+ 423 acceptable
426
+ 424 acceptable
427
+ 425 acceptable
428
+ 426 acceptable
429
+ 427 acceptable
430
+ 428 acceptable
431
+ 429 acceptable
432
+ 430 acceptable
433
+ 431 acceptable
434
+ 432 acceptable
435
+ 433 unacceptable
436
+ 434 acceptable
437
+ 435 unacceptable
438
+ 436 acceptable
439
+ 437 acceptable
440
+ 438 acceptable
441
+ 439 acceptable
442
+ 440 acceptable
443
+ 441 acceptable
444
+ 442 acceptable
445
+ 443 acceptable
446
+ 444 unacceptable
447
+ 445 unacceptable
448
+ 446 acceptable
449
+ 447 acceptable
450
+ 448 unacceptable
451
+ 449 unacceptable
452
+ 450 unacceptable
453
+ 451 acceptable
454
+ 452 acceptable
455
+ 453 acceptable
456
+ 454 acceptable
457
+ 455 acceptable
458
+ 456 acceptable
459
+ 457 acceptable
460
+ 458 acceptable
461
+ 459 unacceptable
462
+ 460 unacceptable
463
+ 461 acceptable
464
+ 462 unacceptable
465
+ 463 acceptable
466
+ 464 acceptable
467
+ 465 acceptable
468
+ 466 acceptable
469
+ 467 acceptable
470
+ 468 acceptable
471
+ 469 acceptable
472
+ 470 acceptable
473
+ 471 acceptable
474
+ 472 acceptable
475
+ 473 unacceptable
476
+ 474 acceptable
477
+ 475 unacceptable
478
+ 476 unacceptable
479
+ 477 unacceptable
480
+ 478 acceptable
481
+ 479 unacceptable
482
+ 480 acceptable
483
+ 481 acceptable
484
+ 482 acceptable
485
+ 483 unacceptable
486
+ 484 unacceptable
487
+ 485 acceptable
488
+ 486 unacceptable
489
+ 487 acceptable
490
+ 488 unacceptable
491
+ 489 acceptable
492
+ 490 acceptable
493
+ 491 unacceptable
494
+ 492 acceptable
495
+ 493 acceptable
496
+ 494 acceptable
497
+ 495 unacceptable
498
+ 496 unacceptable
499
+ 497 acceptable
500
+ 498 acceptable
501
+ 499 acceptable
502
+ 500 acceptable
503
+ 501 acceptable
504
+ 502 acceptable
505
+ 503 acceptable
506
+ 504 unacceptable
507
+ 505 acceptable
508
+ 506 acceptable
509
+ 507 acceptable
510
+ 508 unacceptable
511
+ 509 acceptable
512
+ 510 acceptable
513
+ 511 unacceptable
514
+ 512 acceptable
515
+ 513 acceptable
516
+ 514 acceptable
517
+ 515 acceptable
518
+ 516 acceptable
519
+ 517 unacceptable
520
+ 518 acceptable
521
+ 519 acceptable
522
+ 520 acceptable
523
+ 521 unacceptable
524
+ 522 unacceptable
525
+ 523 acceptable
526
+ 524 unacceptable
527
+ 525 acceptable
528
+ 526 acceptable
529
+ 527 acceptable
530
+ 528 acceptable
531
+ 529 acceptable
532
+ 530 acceptable
533
+ 531 unacceptable
534
+ 532 acceptable
535
+ 533 acceptable
536
+ 534 acceptable
537
+ 535 acceptable
538
+ 536 acceptable
539
+ 537 acceptable
540
+ 538 acceptable
541
+ 539 acceptable
542
+ 540 acceptable
543
+ 541 acceptable
544
+ 542 acceptable
545
+ 543 acceptable
546
+ 544 acceptable
547
+ 545 acceptable
548
+ 546 acceptable
549
+ 547 acceptable
550
+ 548 acceptable
551
+ 549 acceptable
552
+ 550 acceptable
553
+ 551 acceptable
554
+ 552 acceptable
555
+ 553 acceptable
556
+ 554 acceptable
557
+ 555 acceptable
558
+ 556 acceptable
559
+ 557 acceptable
560
+ 558 acceptable
561
+ 559 acceptable
562
+ 560 acceptable
563
+ 561 acceptable
564
+ 562 acceptable
565
+ 563 acceptable
566
+ 564 acceptable
567
+ 565 acceptable
568
+ 566 acceptable
569
+ 567 acceptable
570
+ 568 acceptable
571
+ 569 acceptable
572
+ 570 unacceptable
573
+ 571 unacceptable
574
+ 572 acceptable
575
+ 573 acceptable
576
+ 574 acceptable
577
+ 575 acceptable
578
+ 576 acceptable
579
+ 577 acceptable
580
+ 578 acceptable
581
+ 579 acceptable
582
+ 580 acceptable
583
+ 581 acceptable
584
+ 582 acceptable
585
+ 583 acceptable
586
+ 584 acceptable
587
+ 585 acceptable
588
+ 586 acceptable
589
+ 587 acceptable
590
+ 588 acceptable
591
+ 589 acceptable
592
+ 590 acceptable
593
+ 591 acceptable
594
+ 592 unacceptable
595
+ 593 acceptable
596
+ 594 acceptable
597
+ 595 unacceptable
598
+ 596 acceptable
599
+ 597 acceptable
600
+ 598 unacceptable
601
+ 599 acceptable
602
+ 600 acceptable
603
+ 601 acceptable
604
+ 602 acceptable
605
+ 603 acceptable
606
+ 604 unacceptable
607
+ 605 unacceptable
608
+ 606 unacceptable
609
+ 607 acceptable
610
+ 608 acceptable
611
+ 609 acceptable
612
+ 610 acceptable
613
+ 611 acceptable
614
+ 612 acceptable
615
+ 613 acceptable
616
+ 614 unacceptable
617
+ 615 unacceptable
618
+ 616 unacceptable
619
+ 617 unacceptable
620
+ 618 acceptable
621
+ 619 acceptable
622
+ 620 acceptable
623
+ 621 unacceptable
624
+ 622 acceptable
625
+ 623 acceptable
626
+ 624 unacceptable
627
+ 625 acceptable
628
+ 626 acceptable
629
+ 627 acceptable
630
+ 628 acceptable
631
+ 629 unacceptable
632
+ 630 unacceptable
633
+ 631 acceptable
634
+ 632 acceptable
635
+ 633 acceptable
636
+ 634 acceptable
637
+ 635 unacceptable
638
+ 636 acceptable
639
+ 637 unacceptable
640
+ 638 acceptable
641
+ 639 acceptable
642
+ 640 unacceptable
643
+ 641 acceptable
644
+ 642 unacceptable
645
+ 643 acceptable
646
+ 644 acceptable
647
+ 645 acceptable
648
+ 646 acceptable
649
+ 647 acceptable
650
+ 648 unacceptable
651
+ 649 acceptable
652
+ 650 unacceptable
653
+ 651 acceptable
654
+ 652 acceptable
655
+ 653 acceptable
656
+ 654 acceptable
657
+ 655 acceptable
658
+ 656 unacceptable
659
+ 657 acceptable
660
+ 658 acceptable
661
+ 659 acceptable
662
+ 660 acceptable
663
+ 661 acceptable
664
+ 662 acceptable
665
+ 663 acceptable
666
+ 664 acceptable
667
+ 665 acceptable
668
+ 666 acceptable
669
+ 667 acceptable
670
+ 668 acceptable
671
+ 669 acceptable
672
+ 670 acceptable
673
+ 671 acceptable
674
+ 672 acceptable
675
+ 673 acceptable
676
+ 674 acceptable
677
+ 675 acceptable
678
+ 676 acceptable
679
+ 677 acceptable
680
+ 678 acceptable
681
+ 679 acceptable
682
+ 680 acceptable
683
+ 681 acceptable
684
+ 682 acceptable
685
+ 683 acceptable
686
+ 684 acceptable
687
+ 685 acceptable
688
+ 686 acceptable
689
+ 687 acceptable
690
+ 688 acceptable
691
+ 689 unacceptable
692
+ 690 acceptable
693
+ 691 acceptable
694
+ 692 acceptable
695
+ 693 acceptable
696
+ 694 unacceptable
697
+ 695 unacceptable
698
+ 696 acceptable
699
+ 697 acceptable
700
+ 698 acceptable
701
+ 699 unacceptable
702
+ 700 acceptable
703
+ 701 unacceptable
704
+ 702 acceptable
705
+ 703 acceptable
706
+ 704 unacceptable
707
+ 705 unacceptable
708
+ 706 unacceptable
709
+ 707 unacceptable
710
+ 708 acceptable
711
+ 709 acceptable
712
+ 710 unacceptable
713
+ 711 acceptable
714
+ 712 acceptable
715
+ 713 acceptable
716
+ 714 acceptable
717
+ 715 unacceptable
718
+ 716 unacceptable
719
+ 717 unacceptable
720
+ 718 unacceptable
721
+ 719 unacceptable
722
+ 720 acceptable
723
+ 721 acceptable
724
+ 722 unacceptable
725
+ 723 unacceptable
726
+ 724 acceptable
727
+ 725 acceptable
728
+ 726 unacceptable
729
+ 727 unacceptable
730
+ 728 acceptable
731
+ 729 acceptable
732
+ 730 acceptable
733
+ 731 unacceptable
734
+ 732 acceptable
735
+ 733 acceptable
736
+ 734 unacceptable
737
+ 735 acceptable
738
+ 736 acceptable
739
+ 737 acceptable
740
+ 738 acceptable
741
+ 739 acceptable
742
+ 740 unacceptable
743
+ 741 unacceptable
744
+ 742 acceptable
745
+ 743 acceptable
746
+ 744 unacceptable
747
+ 745 acceptable
748
+ 746 acceptable
749
+ 747 unacceptable
750
+ 748 acceptable
751
+ 749 acceptable
752
+ 750 acceptable
753
+ 751 acceptable
754
+ 752 acceptable
755
+ 753 acceptable
756
+ 754 acceptable
757
+ 755 unacceptable
758
+ 756 unacceptable
759
+ 757 acceptable
760
+ 758 acceptable
761
+ 759 acceptable
762
+ 760 acceptable
763
+ 761 acceptable
764
+ 762 acceptable
765
+ 763 acceptable
766
+ 764 acceptable
767
+ 765 unacceptable
768
+ 766 acceptable
769
+ 767 acceptable
770
+ 768 acceptable
771
+ 769 acceptable
772
+ 770 acceptable
773
+ 771 acceptable
774
+ 772 acceptable
775
+ 773 acceptable
776
+ 774 acceptable
777
+ 775 unacceptable
778
+ 776 unacceptable
779
+ 777 acceptable
780
+ 778 acceptable
781
+ 779 acceptable
782
+ 780 acceptable
783
+ 781 acceptable
784
+ 782 acceptable
785
+ 783 acceptable
786
+ 784 acceptable
787
+ 785 acceptable
788
+ 786 unacceptable
789
+ 787 acceptable
790
+ 788 acceptable
791
+ 789 acceptable
792
+ 790 acceptable
793
+ 791 acceptable
794
+ 792 unacceptable
795
+ 793 acceptable
796
+ 794 acceptable
797
+ 795 acceptable
798
+ 796 unacceptable
799
+ 797 acceptable
800
+ 798 unacceptable
801
+ 799 unacceptable
802
+ 800 acceptable
803
+ 801 acceptable
804
+ 802 acceptable
805
+ 803 acceptable
806
+ 804 unacceptable
807
+ 805 acceptable
808
+ 806 acceptable
809
+ 807 unacceptable
810
+ 808 unacceptable
811
+ 809 acceptable
812
+ 810 acceptable
813
+ 811 acceptable
814
+ 812 acceptable
815
+ 813 acceptable
816
+ 814 acceptable
817
+ 815 unacceptable
818
+ 816 acceptable
819
+ 817 acceptable
820
+ 818 acceptable
821
+ 819 acceptable
822
+ 820 acceptable
823
+ 821 acceptable
824
+ 822 acceptable
825
+ 823 unacceptable
826
+ 824 acceptable
827
+ 825 unacceptable
828
+ 826 acceptable
829
+ 827 acceptable
830
+ 828 acceptable
831
+ 829 acceptable
832
+ 830 acceptable
833
+ 831 unacceptable
834
+ 832 unacceptable
835
+ 833 acceptable
836
+ 834 acceptable
837
+ 835 acceptable
838
+ 836 acceptable
839
+ 837 acceptable
840
+ 838 acceptable
841
+ 839 unacceptable
842
+ 840 acceptable
843
+ 841 unacceptable
844
+ 842 acceptable
845
+ 843 acceptable
846
+ 844 acceptable
847
+ 845 acceptable
848
+ 846 unacceptable
849
+ 847 acceptable
850
+ 848 acceptable
851
+ 849 acceptable
852
+ 850 acceptable
853
+ 851 unacceptable
854
+ 852 acceptable
855
+ 853 unacceptable
856
+ 854 acceptable
857
+ 855 acceptable
858
+ 856 acceptable
859
+ 857 acceptable
860
+ 858 unacceptable
861
+ 859 acceptable
862
+ 860 acceptable
863
+ 861 acceptable
864
+ 862 acceptable
865
+ 863 acceptable
866
+ 864 acceptable
867
+ 865 acceptable
868
+ 866 acceptable
869
+ 867 acceptable
870
+ 868 acceptable
871
+ 869 acceptable
872
+ 870 acceptable
873
+ 871 acceptable
874
+ 872 unacceptable
875
+ 873 unacceptable
876
+ 874 acceptable
877
+ 875 acceptable
878
+ 876 acceptable
879
+ 877 acceptable
880
+ 878 acceptable
881
+ 879 acceptable
882
+ 880 acceptable
883
+ 881 acceptable
884
+ 882 acceptable
885
+ 883 acceptable
886
+ 884 acceptable
887
+ 885 acceptable
888
+ 886 unacceptable
889
+ 887 unacceptable
890
+ 888 unacceptable
891
+ 889 unacceptable
892
+ 890 unacceptable
893
+ 891 unacceptable
894
+ 892 unacceptable
895
+ 893 acceptable
896
+ 894 acceptable
897
+ 895 acceptable
898
+ 896 acceptable
899
+ 897 unacceptable
900
+ 898 unacceptable
901
+ 899 acceptable
902
+ 900 acceptable
903
+ 901 acceptable
904
+ 902 acceptable
905
+ 903 acceptable
906
+ 904 unacceptable
907
+ 905 acceptable
908
+ 906 acceptable
909
+ 907 acceptable
910
+ 908 acceptable
911
+ 909 acceptable
912
+ 910 acceptable
913
+ 911 acceptable
914
+ 912 acceptable
915
+ 913 acceptable
916
+ 914 acceptable
917
+ 915 acceptable
918
+ 916 acceptable
919
+ 917 unacceptable
920
+ 918 acceptable
921
+ 919 acceptable
922
+ 920 acceptable
923
+ 921 acceptable
924
+ 922 acceptable
925
+ 923 acceptable
926
+ 924 acceptable
927
+ 925 acceptable
928
+ 926 acceptable
929
+ 927 acceptable
930
+ 928 acceptable
931
+ 929 unacceptable
932
+ 930 unacceptable
933
+ 931 unacceptable
934
+ 932 unacceptable
935
+ 933 unacceptable
936
+ 934 acceptable
937
+ 935 acceptable
938
+ 936 acceptable
939
+ 937 unacceptable
940
+ 938 unacceptable
941
+ 939 unacceptable
942
+ 940 acceptable
943
+ 941 acceptable
944
+ 942 acceptable
945
+ 943 acceptable
946
+ 944 acceptable
947
+ 945 acceptable
948
+ 946 acceptable
949
+ 947 acceptable
950
+ 948 unacceptable
951
+ 949 unacceptable
952
+ 950 acceptable
953
+ 951 acceptable
954
+ 952 unacceptable
955
+ 953 unacceptable
956
+ 954 acceptable
957
+ 955 unacceptable
958
+ 956 acceptable
959
+ 957 acceptable
960
+ 958 acceptable
961
+ 959 acceptable
962
+ 960 acceptable
963
+ 961 acceptable
964
+ 962 acceptable
965
+ 963 acceptable
966
+ 964 acceptable
967
+ 965 acceptable
968
+ 966 acceptable
969
+ 967 acceptable
970
+ 968 acceptable
971
+ 969 unacceptable
972
+ 970 acceptable
973
+ 971 acceptable
974
+ 972 acceptable
975
+ 973 acceptable
976
+ 974 acceptable
977
+ 975 acceptable
978
+ 976 acceptable
979
+ 977 acceptable
980
+ 978 acceptable
981
+ 979 acceptable
982
+ 980 acceptable
983
+ 981 acceptable
984
+ 982 unacceptable
985
+ 983 acceptable
986
+ 984 acceptable
987
+ 985 acceptable
988
+ 986 acceptable
989
+ 987 acceptable
990
+ 988 acceptable
991
+ 989 acceptable
992
+ 990 acceptable
993
+ 991 acceptable
994
+ 992 acceptable
995
+ 993 unacceptable
996
+ 994 acceptable
997
+ 995 acceptable
998
+ 996 acceptable
999
+ 997 acceptable
1000
+ 998 acceptable
1001
+ 999 unacceptable
1002
+ 1000 acceptable
1003
+ 1001 acceptable
1004
+ 1002 acceptable
1005
+ 1003 acceptable
1006
+ 1004 unacceptable
1007
+ 1005 unacceptable
1008
+ 1006 acceptable
1009
+ 1007 acceptable
1010
+ 1008 acceptable
1011
+ 1009 acceptable
1012
+ 1010 acceptable
1013
+ 1011 acceptable
1014
+ 1012 unacceptable
1015
+ 1013 acceptable
1016
+ 1014 acceptable
1017
+ 1015 acceptable
1018
+ 1016 acceptable
1019
+ 1017 acceptable
1020
+ 1018 acceptable
1021
+ 1019 acceptable
1022
+ 1020 unacceptable
1023
+ 1021 acceptable
1024
+ 1022 acceptable
1025
+ 1023 unacceptable
1026
+ 1024 acceptable
1027
+ 1025 acceptable
1028
+ 1026 acceptable
1029
+ 1027 acceptable
1030
+ 1028 acceptable
1031
+ 1029 acceptable
1032
+ 1030 acceptable
1033
+ 1031 acceptable
1034
+ 1032 acceptable
1035
+ 1033 acceptable
1036
+ 1034 unacceptable
1037
+ 1035 unacceptable
1038
+ 1036 acceptable
1039
+ 1037 acceptable
1040
+ 1038 acceptable
1041
+ 1039 acceptable
1042
+ 1040 unacceptable
1043
+ 1041 unacceptable
1044
+ 1042 acceptable
1045
+ 1043 acceptable
1046
+ 1044 acceptable
1047
+ 1045 acceptable
1048
+ 1046 acceptable
1049
+ 1047 acceptable
1050
+ 1048 acceptable
1051
+ 1049 acceptable
1052
+ 1050 unacceptable
1053
+ 1051 unacceptable
1054
+ 1052 unacceptable
1055
+ 1053 unacceptable
1056
+ 1054 acceptable
1057
+ 1055 unacceptable
1058
+ 1056 acceptable
1059
+ 1057 acceptable
1060
+ 1058 acceptable
1061
+ 1059 unacceptable
1062
+ 1060 unacceptable
1063
+ 1061 acceptable
1064
+ 1062 acceptable
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "train_loss": 0.1824943411264763,
4
+ "train_runtime": 434.1785,
5
+ "train_samples": 8551,
6
+ "train_samples_per_second": 315.115,
7
+ "train_steps_per_second": 2.469
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,2311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5395539646127814,
3
+ "best_model_checkpoint": "./runtime-text-classification/xtremedistil-l12-h384-uncased-CoLA/checkpoint-268",
4
+ "epoch": 16.0,
5
+ "global_step": 1072,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04,
12
+ "learning_rate": 9.090909090909091e-06,
13
+ "loss": 0.6853,
14
+ "step": 3
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 1.8181818181818182e-05,
19
+ "loss": 0.6765,
20
+ "step": 6
21
+ },
22
+ {
23
+ "epoch": 0.13,
24
+ "learning_rate": 2.7272727272727273e-05,
25
+ "loss": 0.6581,
26
+ "step": 9
27
+ },
28
+ {
29
+ "epoch": 0.18,
30
+ "learning_rate": 3.6363636363636364e-05,
31
+ "loss": 0.6444,
32
+ "step": 12
33
+ },
34
+ {
35
+ "epoch": 0.22,
36
+ "learning_rate": 4.545454545454546e-05,
37
+ "loss": 0.6218,
38
+ "step": 15
39
+ },
40
+ {
41
+ "epoch": 0.27,
42
+ "learning_rate": 5.4545454545454546e-05,
43
+ "loss": 0.6467,
44
+ "step": 18
45
+ },
46
+ {
47
+ "epoch": 0.31,
48
+ "learning_rate": 6.363636363636364e-05,
49
+ "loss": 0.594,
50
+ "step": 21
51
+ },
52
+ {
53
+ "epoch": 0.36,
54
+ "learning_rate": 7.272727272727273e-05,
55
+ "loss": 0.6193,
56
+ "step": 24
57
+ },
58
+ {
59
+ "epoch": 0.4,
60
+ "learning_rate": 8.181818181818183e-05,
61
+ "loss": 0.6009,
62
+ "step": 27
63
+ },
64
+ {
65
+ "epoch": 0.45,
66
+ "learning_rate": 9.090909090909092e-05,
67
+ "loss": 0.5942,
68
+ "step": 30
69
+ },
70
+ {
71
+ "epoch": 0.49,
72
+ "learning_rate": 0.0001,
73
+ "loss": 0.5839,
74
+ "step": 33
75
+ },
76
+ {
77
+ "epoch": 0.54,
78
+ "learning_rate": 9.999794293416862e-05,
79
+ "loss": 0.5991,
80
+ "step": 36
81
+ },
82
+ {
83
+ "epoch": 0.58,
84
+ "learning_rate": 9.999177190593525e-05,
85
+ "loss": 0.5808,
86
+ "step": 39
87
+ },
88
+ {
89
+ "epoch": 0.63,
90
+ "learning_rate": 9.998148742306837e-05,
91
+ "loss": 0.5599,
92
+ "step": 42
93
+ },
94
+ {
95
+ "epoch": 0.67,
96
+ "learning_rate": 9.996709033180229e-05,
97
+ "loss": 0.5859,
98
+ "step": 45
99
+ },
100
+ {
101
+ "epoch": 0.72,
102
+ "learning_rate": 9.99485818167676e-05,
103
+ "loss": 0.5573,
104
+ "step": 48
105
+ },
106
+ {
107
+ "epoch": 0.76,
108
+ "learning_rate": 9.992596340089365e-05,
109
+ "loss": 0.5645,
110
+ "step": 51
111
+ },
112
+ {
113
+ "epoch": 0.81,
114
+ "learning_rate": 9.989923694528327e-05,
115
+ "loss": 0.513,
116
+ "step": 54
117
+ },
118
+ {
119
+ "epoch": 0.85,
120
+ "learning_rate": 9.986840464905958e-05,
121
+ "loss": 0.5347,
122
+ "step": 57
123
+ },
124
+ {
125
+ "epoch": 0.9,
126
+ "learning_rate": 9.983346904918513e-05,
127
+ "loss": 0.5333,
128
+ "step": 60
129
+ },
130
+ {
131
+ "epoch": 0.94,
132
+ "learning_rate": 9.979443302025305e-05,
133
+ "loss": 0.5549,
134
+ "step": 63
135
+ },
136
+ {
137
+ "epoch": 0.99,
138
+ "learning_rate": 9.975129977425062e-05,
139
+ "loss": 0.4822,
140
+ "step": 66
141
+ },
142
+ {
143
+ "epoch": 1.0,
144
+ "eval_loss": 0.5893396139144897,
145
+ "eval_matthews_correlation": 0.26208039166807634,
146
+ "eval_runtime": 1.0615,
147
+ "eval_samples_per_second": 982.602,
148
+ "eval_steps_per_second": 62.178,
149
+ "step": 67
150
+ },
151
+ {
152
+ "epoch": 1.03,
153
+ "learning_rate": 9.970407286029487e-05,
154
+ "loss": 0.4866,
155
+ "step": 69
156
+ },
157
+ {
158
+ "epoch": 1.07,
159
+ "learning_rate": 9.965275616434067e-05,
160
+ "loss": 0.4771,
161
+ "step": 72
162
+ },
163
+ {
164
+ "epoch": 1.12,
165
+ "learning_rate": 9.959735390886086e-05,
166
+ "loss": 0.506,
167
+ "step": 75
168
+ },
169
+ {
170
+ "epoch": 1.16,
171
+ "learning_rate": 9.953787065249896e-05,
172
+ "loss": 0.5117,
173
+ "step": 78
174
+ },
175
+ {
176
+ "epoch": 1.21,
177
+ "learning_rate": 9.947431128969387e-05,
178
+ "loss": 0.4754,
179
+ "step": 81
180
+ },
181
+ {
182
+ "epoch": 1.25,
183
+ "learning_rate": 9.940668105027739e-05,
184
+ "loss": 0.5221,
185
+ "step": 84
186
+ },
187
+ {
188
+ "epoch": 1.3,
189
+ "learning_rate": 9.933498549904368e-05,
190
+ "loss": 0.4806,
191
+ "step": 87
192
+ },
193
+ {
194
+ "epoch": 1.34,
195
+ "learning_rate": 9.925923053529147e-05,
196
+ "loss": 0.4426,
197
+ "step": 90
198
+ },
199
+ {
200
+ "epoch": 1.39,
201
+ "learning_rate": 9.917942239233871e-05,
202
+ "loss": 0.5009,
203
+ "step": 93
204
+ },
205
+ {
206
+ "epoch": 1.43,
207
+ "learning_rate": 9.909556763700951e-05,
208
+ "loss": 0.4601,
209
+ "step": 96
210
+ },
211
+ {
212
+ "epoch": 1.48,
213
+ "learning_rate": 9.900767316909396e-05,
214
+ "loss": 0.4366,
215
+ "step": 99
216
+ },
217
+ {
218
+ "epoch": 1.52,
219
+ "learning_rate": 9.891574622078034e-05,
220
+ "loss": 0.4403,
221
+ "step": 102
222
+ },
223
+ {
224
+ "epoch": 1.57,
225
+ "learning_rate": 9.881979435606003e-05,
226
+ "loss": 0.3992,
227
+ "step": 105
228
+ },
229
+ {
230
+ "epoch": 1.61,
231
+ "learning_rate": 9.871982547010513e-05,
232
+ "loss": 0.464,
233
+ "step": 108
234
+ },
235
+ {
236
+ "epoch": 1.66,
237
+ "learning_rate": 9.861584778861878e-05,
238
+ "loss": 0.4349,
239
+ "step": 111
240
+ },
241
+ {
242
+ "epoch": 1.7,
243
+ "learning_rate": 9.850786986715846e-05,
244
+ "loss": 0.4249,
245
+ "step": 114
246
+ },
247
+ {
248
+ "epoch": 1.75,
249
+ "learning_rate": 9.839590059043184e-05,
250
+ "loss": 0.4344,
251
+ "step": 117
252
+ },
253
+ {
254
+ "epoch": 1.79,
255
+ "learning_rate": 9.827994917156587e-05,
256
+ "loss": 0.4342,
257
+ "step": 120
258
+ },
259
+ {
260
+ "epoch": 1.84,
261
+ "learning_rate": 9.816002515134865e-05,
262
+ "loss": 0.4304,
263
+ "step": 123
264
+ },
265
+ {
266
+ "epoch": 1.88,
267
+ "learning_rate": 9.80361383974443e-05,
268
+ "loss": 0.4584,
269
+ "step": 126
270
+ },
271
+ {
272
+ "epoch": 1.93,
273
+ "learning_rate": 9.790829910358122e-05,
274
+ "loss": 0.4151,
275
+ "step": 129
276
+ },
277
+ {
278
+ "epoch": 1.97,
279
+ "learning_rate": 9.777651778871309e-05,
280
+ "loss": 0.4669,
281
+ "step": 132
282
+ },
283
+ {
284
+ "epoch": 2.0,
285
+ "eval_loss": 0.5811270475387573,
286
+ "eval_matthews_correlation": 0.3721681506432673,
287
+ "eval_runtime": 1.0734,
288
+ "eval_samples_per_second": 971.718,
289
+ "eval_steps_per_second": 61.489,
290
+ "step": 134
291
+ },
292
+ {
293
+ "epoch": 2.01,
294
+ "learning_rate": 9.764080529615351e-05,
295
+ "loss": 0.4085,
296
+ "step": 135
297
+ },
298
+ {
299
+ "epoch": 2.06,
300
+ "learning_rate": 9.750117279268378e-05,
301
+ "loss": 0.3755,
302
+ "step": 138
303
+ },
304
+ {
305
+ "epoch": 2.1,
306
+ "learning_rate": 9.735763176763394e-05,
307
+ "loss": 0.3877,
308
+ "step": 141
309
+ },
310
+ {
311
+ "epoch": 2.15,
312
+ "learning_rate": 9.721019403193753e-05,
313
+ "loss": 0.3769,
314
+ "step": 144
315
+ },
316
+ {
317
+ "epoch": 2.19,
318
+ "learning_rate": 9.705887171715966e-05,
319
+ "loss": 0.3468,
320
+ "step": 147
321
+ },
322
+ {
323
+ "epoch": 2.24,
324
+ "learning_rate": 9.690367727449888e-05,
325
+ "loss": 0.3736,
326
+ "step": 150
327
+ },
328
+ {
329
+ "epoch": 2.28,
330
+ "learning_rate": 9.674462347376259e-05,
331
+ "loss": 0.3744,
332
+ "step": 153
333
+ },
334
+ {
335
+ "epoch": 2.33,
336
+ "learning_rate": 9.658172340231635e-05,
337
+ "loss": 0.3203,
338
+ "step": 156
339
+ },
340
+ {
341
+ "epoch": 2.37,
342
+ "learning_rate": 9.6414990464007e-05,
343
+ "loss": 0.3398,
344
+ "step": 159
345
+ },
346
+ {
347
+ "epoch": 2.42,
348
+ "learning_rate": 9.624443837805972e-05,
349
+ "loss": 0.3255,
350
+ "step": 162
351
+ },
352
+ {
353
+ "epoch": 2.46,
354
+ "learning_rate": 9.607008117794928e-05,
355
+ "loss": 0.349,
356
+ "step": 165
357
+ },
358
+ {
359
+ "epoch": 2.51,
360
+ "learning_rate": 9.589193321024524e-05,
361
+ "loss": 0.4708,
362
+ "step": 168
363
+ },
364
+ {
365
+ "epoch": 2.55,
366
+ "learning_rate": 9.571000913343148e-05,
367
+ "loss": 0.3565,
368
+ "step": 171
369
+ },
370
+ {
371
+ "epoch": 2.6,
372
+ "learning_rate": 9.552432391670009e-05,
373
+ "loss": 0.3502,
374
+ "step": 174
375
+ },
376
+ {
377
+ "epoch": 2.64,
378
+ "learning_rate": 9.533489283871966e-05,
379
+ "loss": 0.3605,
380
+ "step": 177
381
+ },
382
+ {
383
+ "epoch": 2.69,
384
+ "learning_rate": 9.51417314863781e-05,
385
+ "loss": 0.4047,
386
+ "step": 180
387
+ },
388
+ {
389
+ "epoch": 2.73,
390
+ "learning_rate": 9.494485575350014e-05,
391
+ "loss": 0.3681,
392
+ "step": 183
393
+ },
394
+ {
395
+ "epoch": 2.78,
396
+ "learning_rate": 9.474428183953951e-05,
397
+ "loss": 0.4035,
398
+ "step": 186
399
+ },
400
+ {
401
+ "epoch": 2.82,
402
+ "learning_rate": 9.454002624824598e-05,
403
+ "loss": 0.3704,
404
+ "step": 189
405
+ },
406
+ {
407
+ "epoch": 2.87,
408
+ "learning_rate": 9.433210578630749e-05,
409
+ "loss": 0.3648,
410
+ "step": 192
411
+ },
412
+ {
413
+ "epoch": 2.91,
414
+ "learning_rate": 9.412053756196715e-05,
415
+ "loss": 0.3498,
416
+ "step": 195
417
+ },
418
+ {
419
+ "epoch": 2.96,
420
+ "learning_rate": 9.390533898361555e-05,
421
+ "loss": 0.3759,
422
+ "step": 198
423
+ },
424
+ {
425
+ "epoch": 3.0,
426
+ "learning_rate": 9.368652775835843e-05,
427
+ "loss": 0.3077,
428
+ "step": 201
429
+ },
430
+ {
431
+ "epoch": 3.0,
432
+ "eval_loss": 0.615044891834259,
433
+ "eval_matthews_correlation": 0.4382589402952836,
434
+ "eval_runtime": 1.0332,
435
+ "eval_samples_per_second": 1009.479,
436
+ "eval_steps_per_second": 63.879,
437
+ "step": 201
438
+ },
439
+ {
440
+ "epoch": 3.04,
441
+ "learning_rate": 9.346412189055955e-05,
442
+ "loss": 0.2746,
443
+ "step": 204
444
+ },
445
+ {
446
+ "epoch": 3.09,
447
+ "learning_rate": 9.323813968035936e-05,
448
+ "loss": 0.242,
449
+ "step": 207
450
+ },
451
+ {
452
+ "epoch": 3.13,
453
+ "learning_rate": 9.300859972216924e-05,
454
+ "loss": 0.2428,
455
+ "step": 210
456
+ },
457
+ {
458
+ "epoch": 3.18,
459
+ "learning_rate": 9.277552090314135e-05,
460
+ "loss": 0.337,
461
+ "step": 213
462
+ },
463
+ {
464
+ "epoch": 3.22,
465
+ "learning_rate": 9.253892240161466e-05,
466
+ "loss": 0.2882,
467
+ "step": 216
468
+ },
469
+ {
470
+ "epoch": 3.27,
471
+ "learning_rate": 9.229882368553692e-05,
472
+ "loss": 0.2743,
473
+ "step": 219
474
+ },
475
+ {
476
+ "epoch": 3.31,
477
+ "learning_rate": 9.205524451086274e-05,
478
+ "loss": 0.2693,
479
+ "step": 222
480
+ },
481
+ {
482
+ "epoch": 3.36,
483
+ "learning_rate": 9.180820491992799e-05,
484
+ "loss": 0.2557,
485
+ "step": 225
486
+ },
487
+ {
488
+ "epoch": 3.4,
489
+ "learning_rate": 9.155772523980075e-05,
490
+ "loss": 0.296,
491
+ "step": 228
492
+ },
493
+ {
494
+ "epoch": 3.45,
495
+ "learning_rate": 9.130382608060868e-05,
496
+ "loss": 0.2739,
497
+ "step": 231
498
+ },
499
+ {
500
+ "epoch": 3.49,
501
+ "learning_rate": 9.104652833384317e-05,
502
+ "loss": 0.2848,
503
+ "step": 234
504
+ },
505
+ {
506
+ "epoch": 3.54,
507
+ "learning_rate": 9.078585317064036e-05,
508
+ "loss": 0.2613,
509
+ "step": 237
510
+ },
511
+ {
512
+ "epoch": 3.58,
513
+ "learning_rate": 9.052182204003909e-05,
514
+ "loss": 0.2775,
515
+ "step": 240
516
+ },
517
+ {
518
+ "epoch": 3.63,
519
+ "learning_rate": 9.025445666721608e-05,
520
+ "loss": 0.2558,
521
+ "step": 243
522
+ },
523
+ {
524
+ "epoch": 3.67,
525
+ "learning_rate": 8.998377905169822e-05,
526
+ "loss": 0.3344,
527
+ "step": 246
528
+ },
529
+ {
530
+ "epoch": 3.72,
531
+ "learning_rate": 8.970981146555247e-05,
532
+ "loss": 0.3003,
533
+ "step": 249
534
+ },
535
+ {
536
+ "epoch": 3.76,
537
+ "learning_rate": 8.943257645155327e-05,
538
+ "loss": 0.2598,
539
+ "step": 252
540
+ },
541
+ {
542
+ "epoch": 3.81,
543
+ "learning_rate": 8.91520968213276e-05,
544
+ "loss": 0.2584,
545
+ "step": 255
546
+ },
547
+ {
548
+ "epoch": 3.85,
549
+ "learning_rate": 8.886839565347797e-05,
550
+ "loss": 0.2581,
551
+ "step": 258
552
+ },
553
+ {
554
+ "epoch": 3.9,
555
+ "learning_rate": 8.858149629168357e-05,
556
+ "loss": 0.3605,
557
+ "step": 261
558
+ },
559
+ {
560
+ "epoch": 3.94,
561
+ "learning_rate": 8.829142234277936e-05,
562
+ "loss": 0.3091,
563
+ "step": 264
564
+ },
565
+ {
566
+ "epoch": 3.99,
567
+ "learning_rate": 8.79981976748137e-05,
568
+ "loss": 0.2594,
569
+ "step": 267
570
+ },
571
+ {
572
+ "epoch": 4.0,
573
+ "eval_loss": 0.49737870693206787,
574
+ "eval_matthews_correlation": 0.5395539646127814,
575
+ "eval_runtime": 1.0788,
576
+ "eval_samples_per_second": 966.811,
577
+ "eval_steps_per_second": 61.179,
578
+ "step": 268
579
+ },
580
+ {
581
+ "epoch": 4.03,
582
+ "learning_rate": 8.770184641508439e-05,
583
+ "loss": 0.2586,
584
+ "step": 270
585
+ },
586
+ {
587
+ "epoch": 4.07,
588
+ "learning_rate": 8.740239294815345e-05,
589
+ "loss": 0.2208,
590
+ "step": 273
591
+ },
592
+ {
593
+ "epoch": 4.12,
594
+ "learning_rate": 8.70998619138407e-05,
595
+ "loss": 0.1708,
596
+ "step": 276
597
+ },
598
+ {
599
+ "epoch": 4.16,
600
+ "learning_rate": 8.679427820519625e-05,
601
+ "loss": 0.2191,
602
+ "step": 279
603
+ },
604
+ {
605
+ "epoch": 4.21,
606
+ "learning_rate": 8.648566696645233e-05,
607
+ "loss": 0.2213,
608
+ "step": 282
609
+ },
610
+ {
611
+ "epoch": 4.25,
612
+ "learning_rate": 8.617405359095437e-05,
613
+ "loss": 0.2575,
614
+ "step": 285
615
+ },
616
+ {
617
+ "epoch": 4.3,
618
+ "learning_rate": 8.585946371907138e-05,
619
+ "loss": 0.2006,
620
+ "step": 288
621
+ },
622
+ {
623
+ "epoch": 4.34,
624
+ "learning_rate": 8.55419232360865e-05,
625
+ "loss": 0.2023,
626
+ "step": 291
627
+ },
628
+ {
629
+ "epoch": 4.39,
630
+ "learning_rate": 8.522145827006675e-05,
631
+ "loss": 0.1837,
632
+ "step": 294
633
+ },
634
+ {
635
+ "epoch": 4.43,
636
+ "learning_rate": 8.489809518971348e-05,
637
+ "loss": 0.2486,
638
+ "step": 297
639
+ },
640
+ {
641
+ "epoch": 4.48,
642
+ "learning_rate": 8.457186060219239e-05,
643
+ "loss": 0.2044,
644
+ "step": 300
645
+ },
646
+ {
647
+ "epoch": 4.52,
648
+ "learning_rate": 8.42427813509444e-05,
649
+ "loss": 0.1813,
650
+ "step": 303
651
+ },
652
+ {
653
+ "epoch": 4.57,
654
+ "learning_rate": 8.391088451347688e-05,
655
+ "loss": 0.2795,
656
+ "step": 306
657
+ },
658
+ {
659
+ "epoch": 4.61,
660
+ "learning_rate": 8.357619739913557e-05,
661
+ "loss": 0.1769,
662
+ "step": 309
663
+ },
664
+ {
665
+ "epoch": 4.66,
666
+ "learning_rate": 8.323874754685755e-05,
667
+ "loss": 0.1875,
668
+ "step": 312
669
+ },
670
+ {
671
+ "epoch": 4.7,
672
+ "learning_rate": 8.289856272290527e-05,
673
+ "loss": 0.2291,
674
+ "step": 315
675
+ },
676
+ {
677
+ "epoch": 4.75,
678
+ "learning_rate": 8.255567091858182e-05,
679
+ "loss": 0.2097,
680
+ "step": 318
681
+ },
682
+ {
683
+ "epoch": 4.79,
684
+ "learning_rate": 8.22101003479278e-05,
685
+ "loss": 0.2009,
686
+ "step": 321
687
+ },
688
+ {
689
+ "epoch": 4.84,
690
+ "learning_rate": 8.186187944539973e-05,
691
+ "loss": 0.2932,
692
+ "step": 324
693
+ },
694
+ {
695
+ "epoch": 4.88,
696
+ "learning_rate": 8.151103686353042e-05,
697
+ "loss": 0.2447,
698
+ "step": 327
699
+ },
700
+ {
701
+ "epoch": 4.93,
702
+ "learning_rate": 8.115760147057138e-05,
703
+ "loss": 0.202,
704
+ "step": 330
705
+ },
706
+ {
707
+ "epoch": 4.97,
708
+ "learning_rate": 8.080160234811742e-05,
709
+ "loss": 0.21,
710
+ "step": 333
711
+ },
712
+ {
713
+ "epoch": 5.0,
714
+ "eval_loss": 0.5594205856323242,
715
+ "eval_matthews_correlation": 0.5181917740456299,
716
+ "eval_runtime": 1.065,
717
+ "eval_samples_per_second": 979.357,
718
+ "eval_steps_per_second": 61.973,
719
+ "step": 335
720
+ },
721
+ {
722
+ "epoch": 5.01,
723
+ "learning_rate": 8.044306878871375e-05,
724
+ "loss": 0.2023,
725
+ "step": 336
726
+ },
727
+ {
728
+ "epoch": 5.06,
729
+ "learning_rate": 8.00820302934458e-05,
730
+ "loss": 0.1664,
731
+ "step": 339
732
+ },
733
+ {
734
+ "epoch": 5.1,
735
+ "learning_rate": 7.971851656951161e-05,
736
+ "loss": 0.2118,
737
+ "step": 342
738
+ },
739
+ {
740
+ "epoch": 5.15,
741
+ "learning_rate": 7.935255752777764e-05,
742
+ "loss": 0.1459,
743
+ "step": 345
744
+ },
745
+ {
746
+ "epoch": 5.19,
747
+ "learning_rate": 7.898418328031752e-05,
748
+ "loss": 0.1239,
749
+ "step": 348
750
+ },
751
+ {
752
+ "epoch": 5.24,
753
+ "learning_rate": 7.861342413793433e-05,
754
+ "loss": 0.2143,
755
+ "step": 351
756
+ },
757
+ {
758
+ "epoch": 5.28,
759
+ "learning_rate": 7.824031060766662e-05,
760
+ "loss": 0.1456,
761
+ "step": 354
762
+ },
763
+ {
764
+ "epoch": 5.33,
765
+ "learning_rate": 7.786487339027815e-05,
766
+ "loss": 0.1705,
767
+ "step": 357
768
+ },
769
+ {
770
+ "epoch": 5.37,
771
+ "learning_rate": 7.748714337773179e-05,
772
+ "loss": 0.1135,
773
+ "step": 360
774
+ },
775
+ {
776
+ "epoch": 5.42,
777
+ "learning_rate": 7.710715165064765e-05,
778
+ "loss": 0.2111,
779
+ "step": 363
780
+ },
781
+ {
782
+ "epoch": 5.46,
783
+ "learning_rate": 7.672492947574566e-05,
784
+ "loss": 0.2572,
785
+ "step": 366
786
+ },
787
+ {
788
+ "epoch": 5.51,
789
+ "learning_rate": 7.634050830327282e-05,
790
+ "loss": 0.2522,
791
+ "step": 369
792
+ },
793
+ {
794
+ "epoch": 5.55,
795
+ "learning_rate": 7.59539197644155e-05,
796
+ "loss": 0.1322,
797
+ "step": 372
798
+ },
799
+ {
800
+ "epoch": 5.6,
801
+ "learning_rate": 7.556519566869666e-05,
802
+ "loss": 0.224,
803
+ "step": 375
804
+ },
805
+ {
806
+ "epoch": 5.64,
807
+ "learning_rate": 7.517436800135853e-05,
808
+ "loss": 0.1831,
809
+ "step": 378
810
+ },
811
+ {
812
+ "epoch": 5.69,
813
+ "learning_rate": 7.47814689207307e-05,
814
+ "loss": 0.1731,
815
+ "step": 381
816
+ },
817
+ {
818
+ "epoch": 5.73,
819
+ "learning_rate": 7.438653075558412e-05,
820
+ "loss": 0.2192,
821
+ "step": 384
822
+ },
823
+ {
824
+ "epoch": 5.78,
825
+ "learning_rate": 7.398958600247103e-05,
826
+ "loss": 0.1522,
827
+ "step": 387
828
+ },
829
+ {
830
+ "epoch": 5.82,
831
+ "learning_rate": 7.359066732305095e-05,
832
+ "loss": 0.2241,
833
+ "step": 390
834
+ },
835
+ {
836
+ "epoch": 5.87,
837
+ "learning_rate": 7.318980754140326e-05,
838
+ "loss": 0.2172,
839
+ "step": 393
840
+ },
841
+ {
842
+ "epoch": 5.91,
843
+ "learning_rate": 7.278703964132639e-05,
844
+ "loss": 0.1706,
845
+ "step": 396
846
+ },
847
+ {
848
+ "epoch": 5.96,
849
+ "learning_rate": 7.238239676362372e-05,
850
+ "loss": 0.1813,
851
+ "step": 399
852
+ },
853
+ {
854
+ "epoch": 6.0,
855
+ "learning_rate": 7.197591220337679e-05,
856
+ "loss": 0.1526,
857
+ "step": 402
858
+ },
859
+ {
860
+ "epoch": 6.0,
861
+ "eval_loss": 0.5715296268463135,
862
+ "eval_matthews_correlation": 0.5149844966342378,
863
+ "eval_runtime": 1.0444,
864
+ "eval_samples_per_second": 998.702,
865
+ "eval_steps_per_second": 63.197,
866
+ "step": 402
867
+ },
868
+ {
869
+ "epoch": 6.04,
870
+ "learning_rate": 7.156761940720555e-05,
871
+ "loss": 0.1491,
872
+ "step": 405
873
+ },
874
+ {
875
+ "epoch": 6.09,
876
+ "learning_rate": 7.115755197051645e-05,
877
+ "loss": 0.1685,
878
+ "step": 408
879
+ },
880
+ {
881
+ "epoch": 6.13,
882
+ "learning_rate": 7.074574363473798e-05,
883
+ "loss": 0.1871,
884
+ "step": 411
885
+ },
886
+ {
887
+ "epoch": 6.18,
888
+ "learning_rate": 7.033222828454442e-05,
889
+ "loss": 0.1312,
890
+ "step": 414
891
+ },
892
+ {
893
+ "epoch": 6.22,
894
+ "learning_rate": 6.991703994506761e-05,
895
+ "loss": 0.1746,
896
+ "step": 417
897
+ },
898
+ {
899
+ "epoch": 6.27,
900
+ "learning_rate": 6.950021277909749e-05,
901
+ "loss": 0.1484,
902
+ "step": 420
903
+ },
904
+ {
905
+ "epoch": 6.31,
906
+ "learning_rate": 6.908178108427088e-05,
907
+ "loss": 0.1476,
908
+ "step": 423
909
+ },
910
+ {
911
+ "epoch": 6.36,
912
+ "learning_rate": 6.866177929024945e-05,
913
+ "loss": 0.1404,
914
+ "step": 426
915
+ },
916
+ {
917
+ "epoch": 6.4,
918
+ "learning_rate": 6.824024195588677e-05,
919
+ "loss": 0.1335,
920
+ "step": 429
921
+ },
922
+ {
923
+ "epoch": 6.45,
924
+ "learning_rate": 6.781720376638477e-05,
925
+ "loss": 0.1782,
926
+ "step": 432
927
+ },
928
+ {
929
+ "epoch": 6.49,
930
+ "learning_rate": 6.739269953043959e-05,
931
+ "loss": 0.1941,
932
+ "step": 435
933
+ },
934
+ {
935
+ "epoch": 6.54,
936
+ "learning_rate": 6.696676417737764e-05,
937
+ "loss": 0.1558,
938
+ "step": 438
939
+ },
940
+ {
941
+ "epoch": 6.58,
942
+ "learning_rate": 6.653943275428135e-05,
943
+ "loss": 0.1311,
944
+ "step": 441
945
+ },
946
+ {
947
+ "epoch": 6.63,
948
+ "learning_rate": 6.611074042310549e-05,
949
+ "loss": 0.128,
950
+ "step": 444
951
+ },
952
+ {
953
+ "epoch": 6.67,
954
+ "learning_rate": 6.568072245778394e-05,
955
+ "loss": 0.1374,
956
+ "step": 447
957
+ },
958
+ {
959
+ "epoch": 6.72,
960
+ "learning_rate": 6.524941424132719e-05,
961
+ "loss": 0.1491,
962
+ "step": 450
963
+ },
964
+ {
965
+ "epoch": 6.76,
966
+ "learning_rate": 6.481685126291106e-05,
967
+ "loss": 0.1242,
968
+ "step": 453
969
+ },
970
+ {
971
+ "epoch": 6.81,
972
+ "learning_rate": 6.438306911495648e-05,
973
+ "loss": 0.1379,
974
+ "step": 456
975
+ },
976
+ {
977
+ "epoch": 6.85,
978
+ "learning_rate": 6.394810349020083e-05,
979
+ "loss": 0.168,
980
+ "step": 459
981
+ },
982
+ {
983
+ "epoch": 6.9,
984
+ "learning_rate": 6.351199017876106e-05,
985
+ "loss": 0.1601,
986
+ "step": 462
987
+ },
988
+ {
989
+ "epoch": 6.94,
990
+ "learning_rate": 6.30747650651889e-05,
991
+ "loss": 0.1641,
992
+ "step": 465
993
+ },
994
+ {
995
+ "epoch": 6.99,
996
+ "learning_rate": 6.263646412551794e-05,
997
+ "loss": 0.1775,
998
+ "step": 468
999
+ },
1000
+ {
1001
+ "epoch": 7.0,
1002
+ "eval_loss": 0.6637021899223328,
1003
+ "eval_matthews_correlation": 0.5019828461798207,
1004
+ "eval_runtime": 1.1986,
1005
+ "eval_samples_per_second": 870.206,
1006
+ "eval_steps_per_second": 55.066,
1007
+ "step": 469
1008
+ },
1009
+ {
1010
+ "epoch": 7.03,
1011
+ "learning_rate": 6.219712342430371e-05,
1012
+ "loss": 0.1263,
1013
+ "step": 471
1014
+ },
1015
+ {
1016
+ "epoch": 7.07,
1017
+ "learning_rate": 6.175677911165599e-05,
1018
+ "loss": 0.114,
1019
+ "step": 474
1020
+ },
1021
+ {
1022
+ "epoch": 7.12,
1023
+ "learning_rate": 6.131546742026438e-05,
1024
+ "loss": 0.0823,
1025
+ "step": 477
1026
+ },
1027
+ {
1028
+ "epoch": 7.16,
1029
+ "learning_rate": 6.0873224662416896e-05,
1030
+ "loss": 0.1068,
1031
+ "step": 480
1032
+ },
1033
+ {
1034
+ "epoch": 7.21,
1035
+ "learning_rate": 6.04300872270122e-05,
1036
+ "loss": 0.1067,
1037
+ "step": 483
1038
+ },
1039
+ {
1040
+ "epoch": 7.25,
1041
+ "learning_rate": 5.998609157656539e-05,
1042
+ "loss": 0.1026,
1043
+ "step": 486
1044
+ },
1045
+ {
1046
+ "epoch": 7.3,
1047
+ "learning_rate": 5.954127424420773e-05,
1048
+ "loss": 0.1208,
1049
+ "step": 489
1050
+ },
1051
+ {
1052
+ "epoch": 7.34,
1053
+ "learning_rate": 5.9095671830680656e-05,
1054
+ "loss": 0.0778,
1055
+ "step": 492
1056
+ },
1057
+ {
1058
+ "epoch": 7.39,
1059
+ "learning_rate": 5.864932100132411e-05,
1060
+ "loss": 0.1066,
1061
+ "step": 495
1062
+ },
1063
+ {
1064
+ "epoch": 7.43,
1065
+ "learning_rate": 5.82022584830597e-05,
1066
+ "loss": 0.1086,
1067
+ "step": 498
1068
+ },
1069
+ {
1070
+ "epoch": 7.48,
1071
+ "learning_rate": 5.7754521061368684e-05,
1072
+ "loss": 0.1833,
1073
+ "step": 501
1074
+ },
1075
+ {
1076
+ "epoch": 7.52,
1077
+ "learning_rate": 5.730614557726509e-05,
1078
+ "loss": 0.1193,
1079
+ "step": 504
1080
+ },
1081
+ {
1082
+ "epoch": 7.57,
1083
+ "learning_rate": 5.685716892426445e-05,
1084
+ "loss": 0.1628,
1085
+ "step": 507
1086
+ },
1087
+ {
1088
+ "epoch": 7.61,
1089
+ "learning_rate": 5.640762804534806e-05,
1090
+ "loss": 0.1774,
1091
+ "step": 510
1092
+ },
1093
+ {
1094
+ "epoch": 7.66,
1095
+ "learning_rate": 5.595755992992317e-05,
1096
+ "loss": 0.1434,
1097
+ "step": 513
1098
+ },
1099
+ {
1100
+ "epoch": 7.7,
1101
+ "learning_rate": 5.550700161077945e-05,
1102
+ "loss": 0.0957,
1103
+ "step": 516
1104
+ },
1105
+ {
1106
+ "epoch": 7.75,
1107
+ "learning_rate": 5.505599016104187e-05,
1108
+ "loss": 0.1219,
1109
+ "step": 519
1110
+ },
1111
+ {
1112
+ "epoch": 7.79,
1113
+ "learning_rate": 5.460456269112013e-05,
1114
+ "loss": 0.1562,
1115
+ "step": 522
1116
+ },
1117
+ {
1118
+ "epoch": 7.84,
1119
+ "learning_rate": 5.415275634565517e-05,
1120
+ "loss": 0.136,
1121
+ "step": 525
1122
+ },
1123
+ {
1124
+ "epoch": 7.88,
1125
+ "learning_rate": 5.370060830046282e-05,
1126
+ "loss": 0.0868,
1127
+ "step": 528
1128
+ },
1129
+ {
1130
+ "epoch": 7.93,
1131
+ "learning_rate": 5.3248155759474846e-05,
1132
+ "loss": 0.1028,
1133
+ "step": 531
1134
+ },
1135
+ {
1136
+ "epoch": 7.97,
1137
+ "learning_rate": 5.2795435951677785e-05,
1138
+ "loss": 0.1681,
1139
+ "step": 534
1140
+ },
1141
+ {
1142
+ "epoch": 8.0,
1143
+ "eval_loss": 0.6957959532737732,
1144
+ "eval_matthews_correlation": 0.5131045571647604,
1145
+ "eval_runtime": 1.0682,
1146
+ "eval_samples_per_second": 976.38,
1147
+ "eval_steps_per_second": 61.784,
1148
+ "step": 536
1149
+ },
1150
+ {
1151
+ "epoch": 8.01,
1152
+ "learning_rate": 5.234248612804952e-05,
1153
+ "loss": 0.1163,
1154
+ "step": 537
1155
+ },
1156
+ {
1157
+ "epoch": 8.06,
1158
+ "learning_rate": 5.1889343558494266e-05,
1159
+ "loss": 0.1022,
1160
+ "step": 540
1161
+ },
1162
+ {
1163
+ "epoch": 8.1,
1164
+ "learning_rate": 5.14360455287759e-05,
1165
+ "loss": 0.0854,
1166
+ "step": 543
1167
+ },
1168
+ {
1169
+ "epoch": 8.15,
1170
+ "learning_rate": 5.098262933744994e-05,
1171
+ "loss": 0.112,
1172
+ "step": 546
1173
+ },
1174
+ {
1175
+ "epoch": 8.19,
1176
+ "learning_rate": 5.052913229279459e-05,
1177
+ "loss": 0.1067,
1178
+ "step": 549
1179
+ },
1180
+ {
1181
+ "epoch": 8.24,
1182
+ "learning_rate": 5.007559170974084e-05,
1183
+ "loss": 0.083,
1184
+ "step": 552
1185
+ },
1186
+ {
1187
+ "epoch": 8.28,
1188
+ "learning_rate": 4.962204490680216e-05,
1189
+ "loss": 0.085,
1190
+ "step": 555
1191
+ },
1192
+ {
1193
+ "epoch": 8.33,
1194
+ "learning_rate": 4.9168529203003814e-05,
1195
+ "loss": 0.1075,
1196
+ "step": 558
1197
+ },
1198
+ {
1199
+ "epoch": 8.37,
1200
+ "learning_rate": 4.871508191481211e-05,
1201
+ "loss": 0.1416,
1202
+ "step": 561
1203
+ },
1204
+ {
1205
+ "epoch": 8.42,
1206
+ "learning_rate": 4.826174035306398e-05,
1207
+ "loss": 0.1248,
1208
+ "step": 564
1209
+ },
1210
+ {
1211
+ "epoch": 8.46,
1212
+ "learning_rate": 4.7808541819896885e-05,
1213
+ "loss": 0.0748,
1214
+ "step": 567
1215
+ },
1216
+ {
1217
+ "epoch": 8.51,
1218
+ "learning_rate": 4.735552360567952e-05,
1219
+ "loss": 0.0707,
1220
+ "step": 570
1221
+ },
1222
+ {
1223
+ "epoch": 8.55,
1224
+ "learning_rate": 4.6902722985943444e-05,
1225
+ "loss": 0.0778,
1226
+ "step": 573
1227
+ },
1228
+ {
1229
+ "epoch": 8.6,
1230
+ "learning_rate": 4.645017721831602e-05,
1231
+ "loss": 0.1447,
1232
+ "step": 576
1233
+ },
1234
+ {
1235
+ "epoch": 8.64,
1236
+ "learning_rate": 4.599792353945466e-05,
1237
+ "loss": 0.0839,
1238
+ "step": 579
1239
+ },
1240
+ {
1241
+ "epoch": 8.69,
1242
+ "learning_rate": 4.5545999161982953e-05,
1243
+ "loss": 0.1268,
1244
+ "step": 582
1245
+ },
1246
+ {
1247
+ "epoch": 8.73,
1248
+ "learning_rate": 4.509444127142871e-05,
1249
+ "loss": 0.0705,
1250
+ "step": 585
1251
+ },
1252
+ {
1253
+ "epoch": 8.78,
1254
+ "learning_rate": 4.464328702316427e-05,
1255
+ "loss": 0.1196,
1256
+ "step": 588
1257
+ },
1258
+ {
1259
+ "epoch": 8.82,
1260
+ "learning_rate": 4.419257353934915e-05,
1261
+ "loss": 0.1071,
1262
+ "step": 591
1263
+ },
1264
+ {
1265
+ "epoch": 8.87,
1266
+ "learning_rate": 4.374233790587565e-05,
1267
+ "loss": 0.1177,
1268
+ "step": 594
1269
+ },
1270
+ {
1271
+ "epoch": 8.91,
1272
+ "learning_rate": 4.329261716931727e-05,
1273
+ "loss": 0.104,
1274
+ "step": 597
1275
+ },
1276
+ {
1277
+ "epoch": 8.96,
1278
+ "learning_rate": 4.284344833388047e-05,
1279
+ "loss": 0.0596,
1280
+ "step": 600
1281
+ },
1282
+ {
1283
+ "epoch": 9.0,
1284
+ "learning_rate": 4.2394868358359774e-05,
1285
+ "loss": 0.124,
1286
+ "step": 603
1287
+ },
1288
+ {
1289
+ "epoch": 9.0,
1290
+ "eval_loss": 0.7057417631149292,
1291
+ "eval_matthews_correlation": 0.5153742778418894,
1292
+ "eval_runtime": 1.2043,
1293
+ "eval_samples_per_second": 866.095,
1294
+ "eval_steps_per_second": 54.806,
1295
+ "step": 603
1296
+ },
1297
+ {
1298
+ "epoch": 9.04,
1299
+ "learning_rate": 4.1946914153096795e-05,
1300
+ "loss": 0.0838,
1301
+ "step": 606
1302
+ },
1303
+ {
1304
+ "epoch": 9.09,
1305
+ "learning_rate": 4.149962257694315e-05,
1306
+ "loss": 0.0915,
1307
+ "step": 609
1308
+ },
1309
+ {
1310
+ "epoch": 9.13,
1311
+ "learning_rate": 4.105303043422753e-05,
1312
+ "loss": 0.0818,
1313
+ "step": 612
1314
+ },
1315
+ {
1316
+ "epoch": 9.18,
1317
+ "learning_rate": 4.060717447172743e-05,
1318
+ "loss": 0.0508,
1319
+ "step": 615
1320
+ },
1321
+ {
1322
+ "epoch": 9.22,
1323
+ "learning_rate": 4.0162091375645493e-05,
1324
+ "loss": 0.1156,
1325
+ "step": 618
1326
+ },
1327
+ {
1328
+ "epoch": 9.27,
1329
+ "learning_rate": 3.971781776859093e-05,
1330
+ "loss": 0.0697,
1331
+ "step": 621
1332
+ },
1333
+ {
1334
+ "epoch": 9.31,
1335
+ "learning_rate": 3.9274390206565956e-05,
1336
+ "loss": 0.0563,
1337
+ "step": 624
1338
+ },
1339
+ {
1340
+ "epoch": 9.36,
1341
+ "learning_rate": 3.883184517595807e-05,
1342
+ "loss": 0.0993,
1343
+ "step": 627
1344
+ },
1345
+ {
1346
+ "epoch": 9.4,
1347
+ "learning_rate": 3.83902190905377e-05,
1348
+ "loss": 0.0621,
1349
+ "step": 630
1350
+ },
1351
+ {
1352
+ "epoch": 9.45,
1353
+ "learning_rate": 3.794954828846208e-05,
1354
+ "loss": 0.0788,
1355
+ "step": 633
1356
+ },
1357
+ {
1358
+ "epoch": 9.49,
1359
+ "learning_rate": 3.7509869029285215e-05,
1360
+ "loss": 0.1224,
1361
+ "step": 636
1362
+ },
1363
+ {
1364
+ "epoch": 9.54,
1365
+ "learning_rate": 3.707121749097431e-05,
1366
+ "loss": 0.0759,
1367
+ "step": 639
1368
+ },
1369
+ {
1370
+ "epoch": 9.58,
1371
+ "learning_rate": 3.663362976693304e-05,
1372
+ "loss": 0.0925,
1373
+ "step": 642
1374
+ },
1375
+ {
1376
+ "epoch": 9.63,
1377
+ "learning_rate": 3.619714186303162e-05,
1378
+ "loss": 0.0908,
1379
+ "step": 645
1380
+ },
1381
+ {
1382
+ "epoch": 9.67,
1383
+ "learning_rate": 3.576178969464414e-05,
1384
+ "loss": 0.12,
1385
+ "step": 648
1386
+ },
1387
+ {
1388
+ "epoch": 9.72,
1389
+ "learning_rate": 3.532760908369344e-05,
1390
+ "loss": 0.0629,
1391
+ "step": 651
1392
+ },
1393
+ {
1394
+ "epoch": 9.76,
1395
+ "learning_rate": 3.489463575570349e-05,
1396
+ "loss": 0.0506,
1397
+ "step": 654
1398
+ },
1399
+ {
1400
+ "epoch": 9.81,
1401
+ "learning_rate": 3.446290533685984e-05,
1402
+ "loss": 0.0738,
1403
+ "step": 657
1404
+ },
1405
+ {
1406
+ "epoch": 9.85,
1407
+ "learning_rate": 3.403245335107822e-05,
1408
+ "loss": 0.0586,
1409
+ "step": 660
1410
+ },
1411
+ {
1412
+ "epoch": 9.9,
1413
+ "learning_rate": 3.360331521708149e-05,
1414
+ "loss": 0.106,
1415
+ "step": 663
1416
+ },
1417
+ {
1418
+ "epoch": 9.94,
1419
+ "learning_rate": 3.3175526245485366e-05,
1420
+ "loss": 0.111,
1421
+ "step": 666
1422
+ },
1423
+ {
1424
+ "epoch": 9.99,
1425
+ "learning_rate": 3.274912163589291e-05,
1426
+ "loss": 0.1111,
1427
+ "step": 669
1428
+ },
1429
+ {
1430
+ "epoch": 10.0,
1431
+ "eval_loss": 0.8173357844352722,
1432
+ "eval_matthews_correlation": 0.5074384885743003,
1433
+ "eval_runtime": 1.0523,
1434
+ "eval_samples_per_second": 991.19,
1435
+ "eval_steps_per_second": 62.722,
1436
+ "step": 670
1437
+ },
1438
+ {
1439
+ "epoch": 10.03,
1440
+ "learning_rate": 3.2324136473998204e-05,
1441
+ "loss": 0.0763,
1442
+ "step": 672
1443
+ },
1444
+ {
1445
+ "epoch": 10.07,
1446
+ "learning_rate": 3.190060572869948e-05,
1447
+ "loss": 0.07,
1448
+ "step": 675
1449
+ },
1450
+ {
1451
+ "epoch": 10.12,
1452
+ "learning_rate": 3.147856424922174e-05,
1453
+ "loss": 0.051,
1454
+ "step": 678
1455
+ },
1456
+ {
1457
+ "epoch": 10.16,
1458
+ "learning_rate": 3.1058046762249224e-05,
1459
+ "loss": 0.0454,
1460
+ "step": 681
1461
+ },
1462
+ {
1463
+ "epoch": 10.21,
1464
+ "learning_rate": 3.063908786906812e-05,
1465
+ "loss": 0.0952,
1466
+ "step": 684
1467
+ },
1468
+ {
1469
+ "epoch": 10.25,
1470
+ "learning_rate": 3.022172204271938e-05,
1471
+ "loss": 0.0582,
1472
+ "step": 687
1473
+ },
1474
+ {
1475
+ "epoch": 10.3,
1476
+ "learning_rate": 2.9805983625162227e-05,
1477
+ "loss": 0.0653,
1478
+ "step": 690
1479
+ },
1480
+ {
1481
+ "epoch": 10.34,
1482
+ "learning_rate": 2.93919068244484e-05,
1483
+ "loss": 0.0617,
1484
+ "step": 693
1485
+ },
1486
+ {
1487
+ "epoch": 10.39,
1488
+ "learning_rate": 2.897952571190743e-05,
1489
+ "loss": 0.0648,
1490
+ "step": 696
1491
+ },
1492
+ {
1493
+ "epoch": 10.43,
1494
+ "learning_rate": 2.8568874219343155e-05,
1495
+ "loss": 0.0503,
1496
+ "step": 699
1497
+ },
1498
+ {
1499
+ "epoch": 10.48,
1500
+ "learning_rate": 2.8159986136241732e-05,
1501
+ "loss": 0.1065,
1502
+ "step": 702
1503
+ },
1504
+ {
1505
+ "epoch": 10.52,
1506
+ "learning_rate": 2.7752895106991384e-05,
1507
+ "loss": 0.1168,
1508
+ "step": 705
1509
+ },
1510
+ {
1511
+ "epoch": 10.57,
1512
+ "learning_rate": 2.7347634628113916e-05,
1513
+ "loss": 0.0471,
1514
+ "step": 708
1515
+ },
1516
+ {
1517
+ "epoch": 10.61,
1518
+ "learning_rate": 2.6944238045508708e-05,
1519
+ "loss": 0.0782,
1520
+ "step": 711
1521
+ },
1522
+ {
1523
+ "epoch": 10.66,
1524
+ "learning_rate": 2.6542738551708828e-05,
1525
+ "loss": 0.0773,
1526
+ "step": 714
1527
+ },
1528
+ {
1529
+ "epoch": 10.7,
1530
+ "learning_rate": 2.6143169183149874e-05,
1531
+ "loss": 0.0629,
1532
+ "step": 717
1533
+ },
1534
+ {
1535
+ "epoch": 10.75,
1536
+ "learning_rate": 2.5745562817451686e-05,
1537
+ "loss": 0.0662,
1538
+ "step": 720
1539
+ },
1540
+ {
1541
+ "epoch": 10.79,
1542
+ "learning_rate": 2.5349952170712977e-05,
1543
+ "loss": 0.0882,
1544
+ "step": 723
1545
+ },
1546
+ {
1547
+ "epoch": 10.84,
1548
+ "learning_rate": 2.4956369794819535e-05,
1549
+ "loss": 0.0508,
1550
+ "step": 726
1551
+ },
1552
+ {
1553
+ "epoch": 10.88,
1554
+ "learning_rate": 2.4564848074765674e-05,
1555
+ "loss": 0.0855,
1556
+ "step": 729
1557
+ },
1558
+ {
1559
+ "epoch": 10.93,
1560
+ "learning_rate": 2.417541922598945e-05,
1561
+ "loss": 0.0617,
1562
+ "step": 732
1563
+ },
1564
+ {
1565
+ "epoch": 10.97,
1566
+ "learning_rate": 2.378811529172203e-05,
1567
+ "loss": 0.1332,
1568
+ "step": 735
1569
+ },
1570
+ {
1571
+ "epoch": 11.0,
1572
+ "eval_loss": 0.8252650499343872,
1573
+ "eval_matthews_correlation": 0.5260499940984096,
1574
+ "eval_runtime": 1.0581,
1575
+ "eval_samples_per_second": 985.733,
1576
+ "eval_steps_per_second": 62.376,
1577
+ "step": 737
1578
+ },
1579
+ {
1580
+ "epoch": 11.01,
1581
+ "learning_rate": 2.340296814035101e-05,
1582
+ "loss": 0.0843,
1583
+ "step": 738
1584
+ },
1585
+ {
1586
+ "epoch": 11.06,
1587
+ "learning_rate": 2.3020009462798163e-05,
1588
+ "loss": 0.0988,
1589
+ "step": 741
1590
+ },
1591
+ {
1592
+ "epoch": 11.1,
1593
+ "learning_rate": 2.263927076991193e-05,
1594
+ "loss": 0.0605,
1595
+ "step": 744
1596
+ },
1597
+ {
1598
+ "epoch": 11.15,
1599
+ "learning_rate": 2.226078338987453e-05,
1600
+ "loss": 0.0337,
1601
+ "step": 747
1602
+ },
1603
+ {
1604
+ "epoch": 11.19,
1605
+ "learning_rate": 2.1884578465624257e-05,
1606
+ "loss": 0.0412,
1607
+ "step": 750
1608
+ },
1609
+ {
1610
+ "epoch": 11.24,
1611
+ "learning_rate": 2.1510686952292934e-05,
1612
+ "loss": 0.0611,
1613
+ "step": 753
1614
+ },
1615
+ {
1616
+ "epoch": 11.28,
1617
+ "learning_rate": 2.1139139614658798e-05,
1618
+ "loss": 0.0587,
1619
+ "step": 756
1620
+ },
1621
+ {
1622
+ "epoch": 11.33,
1623
+ "learning_rate": 2.0769967024615183e-05,
1624
+ "loss": 0.0204,
1625
+ "step": 759
1626
+ },
1627
+ {
1628
+ "epoch": 11.37,
1629
+ "learning_rate": 2.0403199558654945e-05,
1630
+ "loss": 0.1346,
1631
+ "step": 762
1632
+ },
1633
+ {
1634
+ "epoch": 11.42,
1635
+ "learning_rate": 2.0038867395370936e-05,
1636
+ "loss": 0.0577,
1637
+ "step": 765
1638
+ },
1639
+ {
1640
+ "epoch": 11.46,
1641
+ "learning_rate": 1.967700051297295e-05,
1642
+ "loss": 0.0754,
1643
+ "step": 768
1644
+ },
1645
+ {
1646
+ "epoch": 11.51,
1647
+ "learning_rate": 1.931762868682098e-05,
1648
+ "loss": 0.0537,
1649
+ "step": 771
1650
+ },
1651
+ {
1652
+ "epoch": 11.55,
1653
+ "learning_rate": 1.8960781486975143e-05,
1654
+ "loss": 0.0334,
1655
+ "step": 774
1656
+ },
1657
+ {
1658
+ "epoch": 11.6,
1659
+ "learning_rate": 1.860648827576278e-05,
1660
+ "loss": 0.0651,
1661
+ "step": 777
1662
+ },
1663
+ {
1664
+ "epoch": 11.64,
1665
+ "learning_rate": 1.8254778205362206e-05,
1666
+ "loss": 0.0742,
1667
+ "step": 780
1668
+ },
1669
+ {
1670
+ "epoch": 11.69,
1671
+ "learning_rate": 1.7905680215404174e-05,
1672
+ "loss": 0.1158,
1673
+ "step": 783
1674
+ },
1675
+ {
1676
+ "epoch": 11.73,
1677
+ "learning_rate": 1.7559223030590577e-05,
1678
+ "loss": 0.0916,
1679
+ "step": 786
1680
+ },
1681
+ {
1682
+ "epoch": 11.78,
1683
+ "learning_rate": 1.7215435158330855e-05,
1684
+ "loss": 0.0286,
1685
+ "step": 789
1686
+ },
1687
+ {
1688
+ "epoch": 11.82,
1689
+ "learning_rate": 1.6874344886396438e-05,
1690
+ "loss": 0.0687,
1691
+ "step": 792
1692
+ },
1693
+ {
1694
+ "epoch": 11.87,
1695
+ "learning_rate": 1.65359802805931e-05,
1696
+ "loss": 0.0705,
1697
+ "step": 795
1698
+ },
1699
+ {
1700
+ "epoch": 11.91,
1701
+ "learning_rate": 1.6200369182451564e-05,
1702
+ "loss": 0.0717,
1703
+ "step": 798
1704
+ },
1705
+ {
1706
+ "epoch": 11.96,
1707
+ "learning_rate": 1.586753920693676e-05,
1708
+ "loss": 0.0445,
1709
+ "step": 801
1710
+ },
1711
+ {
1712
+ "epoch": 12.0,
1713
+ "learning_rate": 1.553751774017551e-05,
1714
+ "loss": 0.0673,
1715
+ "step": 804
1716
+ },
1717
+ {
1718
+ "epoch": 12.0,
1719
+ "eval_loss": 0.8085535764694214,
1720
+ "eval_matthews_correlation": 0.5179780196184617,
1721
+ "eval_runtime": 1.0516,
1722
+ "eval_samples_per_second": 991.801,
1723
+ "eval_steps_per_second": 62.76,
1724
+ "step": 804
1725
+ },
1726
+ {
1727
+ "epoch": 12.04,
1728
+ "learning_rate": 1.5210331937203088e-05,
1729
+ "loss": 0.094,
1730
+ "step": 807
1731
+ },
1732
+ {
1733
+ "epoch": 12.09,
1734
+ "learning_rate": 1.4886008719728989e-05,
1735
+ "loss": 0.0523,
1736
+ "step": 810
1737
+ },
1738
+ {
1739
+ "epoch": 12.13,
1740
+ "learning_rate": 1.4564574773921514e-05,
1741
+ "loss": 0.0667,
1742
+ "step": 813
1743
+ },
1744
+ {
1745
+ "epoch": 12.18,
1746
+ "learning_rate": 1.4246056548212172e-05,
1747
+ "loss": 0.0533,
1748
+ "step": 816
1749
+ },
1750
+ {
1751
+ "epoch": 12.22,
1752
+ "learning_rate": 1.3930480251119321e-05,
1753
+ "loss": 0.0358,
1754
+ "step": 819
1755
+ },
1756
+ {
1757
+ "epoch": 12.27,
1758
+ "learning_rate": 1.3617871849091657e-05,
1759
+ "loss": 0.0532,
1760
+ "step": 822
1761
+ },
1762
+ {
1763
+ "epoch": 12.31,
1764
+ "learning_rate": 1.3308257064371677e-05,
1765
+ "loss": 0.0208,
1766
+ "step": 825
1767
+ },
1768
+ {
1769
+ "epoch": 12.36,
1770
+ "learning_rate": 1.3001661372879192e-05,
1771
+ "loss": 0.0506,
1772
+ "step": 828
1773
+ },
1774
+ {
1775
+ "epoch": 12.4,
1776
+ "learning_rate": 1.2698110002115004e-05,
1777
+ "loss": 0.0827,
1778
+ "step": 831
1779
+ },
1780
+ {
1781
+ "epoch": 12.45,
1782
+ "learning_rate": 1.2397627929085248e-05,
1783
+ "loss": 0.0764,
1784
+ "step": 834
1785
+ },
1786
+ {
1787
+ "epoch": 12.49,
1788
+ "learning_rate": 1.2100239878246133e-05,
1789
+ "loss": 0.0863,
1790
+ "step": 837
1791
+ },
1792
+ {
1793
+ "epoch": 12.54,
1794
+ "learning_rate": 1.1805970319469589e-05,
1795
+ "loss": 0.0572,
1796
+ "step": 840
1797
+ },
1798
+ {
1799
+ "epoch": 12.58,
1800
+ "learning_rate": 1.1514843466029807e-05,
1801
+ "loss": 0.103,
1802
+ "step": 843
1803
+ },
1804
+ {
1805
+ "epoch": 12.63,
1806
+ "learning_rate": 1.1226883272610877e-05,
1807
+ "loss": 0.0234,
1808
+ "step": 846
1809
+ },
1810
+ {
1811
+ "epoch": 12.67,
1812
+ "learning_rate": 1.0942113433335788e-05,
1813
+ "loss": 0.0521,
1814
+ "step": 849
1815
+ },
1816
+ {
1817
+ "epoch": 12.72,
1818
+ "learning_rate": 1.0660557379816816e-05,
1819
+ "loss": 0.0459,
1820
+ "step": 852
1821
+ },
1822
+ {
1823
+ "epoch": 12.76,
1824
+ "learning_rate": 1.0382238279227419e-05,
1825
+ "loss": 0.0695,
1826
+ "step": 855
1827
+ },
1828
+ {
1829
+ "epoch": 12.81,
1830
+ "learning_rate": 1.0107179032396086e-05,
1831
+ "loss": 0.076,
1832
+ "step": 858
1833
+ },
1834
+ {
1835
+ "epoch": 12.85,
1836
+ "learning_rate": 9.835402271921974e-06,
1837
+ "loss": 0.0414,
1838
+ "step": 861
1839
+ },
1840
+ {
1841
+ "epoch": 12.9,
1842
+ "learning_rate": 9.56693036031256e-06,
1843
+ "loss": 0.0643,
1844
+ "step": 864
1845
+ },
1846
+ {
1847
+ "epoch": 12.94,
1848
+ "learning_rate": 9.301785388143697e-06,
1849
+ "loss": 0.0413,
1850
+ "step": 867
1851
+ },
1852
+ {
1853
+ "epoch": 12.99,
1854
+ "learning_rate": 9.039989172241886e-06,
1855
+ "loss": 0.0512,
1856
+ "step": 870
1857
+ },
1858
+ {
1859
+ "epoch": 13.0,
1860
+ "eval_loss": 0.8409435749053955,
1861
+ "eval_matthews_correlation": 0.5127766293248668,
1862
+ "eval_runtime": 1.0686,
1863
+ "eval_samples_per_second": 976.072,
1864
+ "eval_steps_per_second": 61.765,
1865
+ "step": 871
1866
+ },
1867
+ {
1868
+ "epoch": 13.03,
1869
+ "learning_rate": 8.781563253889164e-06,
1870
+ "loss": 0.0642,
1871
+ "step": 873
1872
+ },
1873
+ {
1874
+ "epoch": 13.07,
1875
+ "learning_rate": 8.52652889705059e-06,
1876
+ "loss": 0.0699,
1877
+ "step": 876
1878
+ },
1879
+ {
1880
+ "epoch": 13.12,
1881
+ "learning_rate": 8.274907086624594e-06,
1882
+ "loss": 0.0431,
1883
+ "step": 879
1884
+ },
1885
+ {
1886
+ "epoch": 13.16,
1887
+ "learning_rate": 8.026718526716342e-06,
1888
+ "loss": 0.035,
1889
+ "step": 882
1890
+ },
1891
+ {
1892
+ "epoch": 13.21,
1893
+ "learning_rate": 7.781983638934092e-06,
1894
+ "loss": 0.0535,
1895
+ "step": 885
1896
+ },
1897
+ {
1898
+ "epoch": 13.25,
1899
+ "learning_rate": 7.5407225607088405e-06,
1900
+ "loss": 0.0553,
1901
+ "step": 888
1902
+ },
1903
+ {
1904
+ "epoch": 13.3,
1905
+ "learning_rate": 7.302955143637419e-06,
1906
+ "loss": 0.0515,
1907
+ "step": 891
1908
+ },
1909
+ {
1910
+ "epoch": 13.34,
1911
+ "learning_rate": 7.068700951849011e-06,
1912
+ "loss": 0.047,
1913
+ "step": 894
1914
+ },
1915
+ {
1916
+ "epoch": 13.39,
1917
+ "learning_rate": 6.837979260395349e-06,
1918
+ "loss": 0.0787,
1919
+ "step": 897
1920
+ },
1921
+ {
1922
+ "epoch": 13.43,
1923
+ "learning_rate": 6.610809053664768e-06,
1924
+ "loss": 0.0559,
1925
+ "step": 900
1926
+ },
1927
+ {
1928
+ "epoch": 13.48,
1929
+ "learning_rate": 6.387209023820073e-06,
1930
+ "loss": 0.0669,
1931
+ "step": 903
1932
+ },
1933
+ {
1934
+ "epoch": 13.52,
1935
+ "learning_rate": 6.1671975692605185e-06,
1936
+ "loss": 0.0828,
1937
+ "step": 906
1938
+ },
1939
+ {
1940
+ "epoch": 13.57,
1941
+ "learning_rate": 5.950792793107934e-06,
1942
+ "loss": 0.0373,
1943
+ "step": 909
1944
+ },
1945
+ {
1946
+ "epoch": 13.61,
1947
+ "learning_rate": 5.738012501717144e-06,
1948
+ "loss": 0.0586,
1949
+ "step": 912
1950
+ },
1951
+ {
1952
+ "epoch": 13.66,
1953
+ "learning_rate": 5.528874203210827e-06,
1954
+ "loss": 0.034,
1955
+ "step": 915
1956
+ },
1957
+ {
1958
+ "epoch": 13.7,
1959
+ "learning_rate": 5.323395106038909e-06,
1960
+ "loss": 0.0595,
1961
+ "step": 918
1962
+ },
1963
+ {
1964
+ "epoch": 13.75,
1965
+ "learning_rate": 5.121592117562573e-06,
1966
+ "loss": 0.0788,
1967
+ "step": 921
1968
+ },
1969
+ {
1970
+ "epoch": 13.79,
1971
+ "learning_rate": 4.923481842663114e-06,
1972
+ "loss": 0.0449,
1973
+ "step": 924
1974
+ },
1975
+ {
1976
+ "epoch": 13.84,
1977
+ "learning_rate": 4.729080582375633e-06,
1978
+ "loss": 0.0459,
1979
+ "step": 927
1980
+ },
1981
+ {
1982
+ "epoch": 13.88,
1983
+ "learning_rate": 4.538404332547719e-06,
1984
+ "loss": 0.0721,
1985
+ "step": 930
1986
+ },
1987
+ {
1988
+ "epoch": 13.93,
1989
+ "learning_rate": 4.351468782523316e-06,
1990
+ "loss": 0.1029,
1991
+ "step": 933
1992
+ },
1993
+ {
1994
+ "epoch": 13.97,
1995
+ "learning_rate": 4.168289313851731e-06,
1996
+ "loss": 0.0457,
1997
+ "step": 936
1998
+ },
1999
+ {
2000
+ "epoch": 14.0,
2001
+ "eval_loss": 0.8759517073631287,
2002
+ "eval_matthews_correlation": 0.4946640161033145,
2003
+ "eval_runtime": 1.079,
2004
+ "eval_samples_per_second": 966.594,
2005
+ "eval_steps_per_second": 61.165,
2006
+ "step": 938
2007
+ },
2008
+ {
2009
+ "epoch": 14.01,
2010
+ "learning_rate": 3.9888809990219985e-06,
2011
+ "loss": 0.0621,
2012
+ "step": 939
2013
+ },
2014
+ {
2015
+ "epoch": 14.06,
2016
+ "learning_rate": 3.8132586002227076e-06,
2017
+ "loss": 0.0333,
2018
+ "step": 942
2019
+ },
2020
+ {
2021
+ "epoch": 14.1,
2022
+ "learning_rate": 3.641436568127271e-06,
2023
+ "loss": 0.0461,
2024
+ "step": 945
2025
+ },
2026
+ {
2027
+ "epoch": 14.15,
2028
+ "learning_rate": 3.4734290407049397e-06,
2029
+ "loss": 0.0532,
2030
+ "step": 948
2031
+ },
2032
+ {
2033
+ "epoch": 14.19,
2034
+ "learning_rate": 3.309249842057499e-06,
2035
+ "loss": 0.044,
2036
+ "step": 951
2037
+ },
2038
+ {
2039
+ "epoch": 14.24,
2040
+ "learning_rate": 3.148912481281713e-06,
2041
+ "loss": 0.0556,
2042
+ "step": 954
2043
+ },
2044
+ {
2045
+ "epoch": 14.28,
2046
+ "learning_rate": 2.99243015135785e-06,
2047
+ "loss": 0.0417,
2048
+ "step": 957
2049
+ },
2050
+ {
2051
+ "epoch": 14.33,
2052
+ "learning_rate": 2.839815728064077e-06,
2053
+ "loss": 0.0593,
2054
+ "step": 960
2055
+ },
2056
+ {
2057
+ "epoch": 14.37,
2058
+ "learning_rate": 2.6910817689169922e-06,
2059
+ "loss": 0.0653,
2060
+ "step": 963
2061
+ },
2062
+ {
2063
+ "epoch": 14.42,
2064
+ "learning_rate": 2.5462405121384423e-06,
2065
+ "loss": 0.0558,
2066
+ "step": 966
2067
+ },
2068
+ {
2069
+ "epoch": 14.46,
2070
+ "learning_rate": 2.405303875648418e-06,
2071
+ "loss": 0.077,
2072
+ "step": 969
2073
+ },
2074
+ {
2075
+ "epoch": 14.51,
2076
+ "learning_rate": 2.268283456084491e-06,
2077
+ "loss": 0.0763,
2078
+ "step": 972
2079
+ },
2080
+ {
2081
+ "epoch": 14.55,
2082
+ "learning_rate": 2.135190527847608e-06,
2083
+ "loss": 0.075,
2084
+ "step": 975
2085
+ },
2086
+ {
2087
+ "epoch": 14.6,
2088
+ "learning_rate": 2.0060360421743584e-06,
2089
+ "loss": 0.0172,
2090
+ "step": 978
2091
+ },
2092
+ {
2093
+ "epoch": 14.64,
2094
+ "learning_rate": 1.8808306262359243e-06,
2095
+ "loss": 0.034,
2096
+ "step": 981
2097
+ },
2098
+ {
2099
+ "epoch": 14.69,
2100
+ "learning_rate": 1.7595845822636293e-06,
2101
+ "loss": 0.0643,
2102
+ "step": 984
2103
+ },
2104
+ {
2105
+ "epoch": 14.73,
2106
+ "learning_rate": 1.6423078867012386e-06,
2107
+ "loss": 0.0568,
2108
+ "step": 987
2109
+ },
2110
+ {
2111
+ "epoch": 14.78,
2112
+ "learning_rate": 1.529010189384078e-06,
2113
+ "loss": 0.0609,
2114
+ "step": 990
2115
+ },
2116
+ {
2117
+ "epoch": 14.82,
2118
+ "learning_rate": 1.419700812745045e-06,
2119
+ "loss": 0.0454,
2120
+ "step": 993
2121
+ },
2122
+ {
2123
+ "epoch": 14.87,
2124
+ "learning_rate": 1.3143887510474629e-06,
2125
+ "loss": 0.0688,
2126
+ "step": 996
2127
+ },
2128
+ {
2129
+ "epoch": 14.91,
2130
+ "learning_rate": 1.2130826696450992e-06,
2131
+ "loss": 0.0528,
2132
+ "step": 999
2133
+ },
2134
+ {
2135
+ "epoch": 14.96,
2136
+ "learning_rate": 1.1157909042690928e-06,
2137
+ "loss": 0.0491,
2138
+ "step": 1002
2139
+ },
2140
+ {
2141
+ "epoch": 15.0,
2142
+ "learning_rate": 1.0225214603420851e-06,
2143
+ "loss": 0.04,
2144
+ "step": 1005
2145
+ },
2146
+ {
2147
+ "epoch": 15.0,
2148
+ "eval_loss": 0.8521906137466431,
2149
+ "eval_matthews_correlation": 0.5103199460790546,
2150
+ "eval_runtime": 1.1012,
2151
+ "eval_samples_per_second": 947.135,
2152
+ "eval_steps_per_second": 59.934,
2153
+ "step": 1005
2154
+ },
2155
+ {
2156
+ "epoch": 15.04,
2157
+ "learning_rate": 9.332820123195418e-07,
2158
+ "loss": 0.0588,
2159
+ "step": 1008
2160
+ },
2161
+ {
2162
+ "epoch": 15.09,
2163
+ "learning_rate": 8.480799030582187e-07,
2164
+ "loss": 0.0311,
2165
+ "step": 1011
2166
+ },
2167
+ {
2168
+ "epoch": 15.13,
2169
+ "learning_rate": 7.669221432120288e-07,
2170
+ "loss": 0.0655,
2171
+ "step": 1014
2172
+ },
2173
+ {
2174
+ "epoch": 15.18,
2175
+ "learning_rate": 6.898154106551702e-07,
2176
+ "loss": 0.0682,
2177
+ "step": 1017
2178
+ },
2179
+ {
2180
+ "epoch": 15.22,
2181
+ "learning_rate": 6.167660499326322e-07,
2182
+ "loss": 0.0736,
2183
+ "step": 1020
2184
+ },
2185
+ {
2186
+ "epoch": 15.27,
2187
+ "learning_rate": 5.477800717381687e-07,
2188
+ "loss": 0.0778,
2189
+ "step": 1023
2190
+ },
2191
+ {
2192
+ "epoch": 15.31,
2193
+ "learning_rate": 4.828631524197325e-07,
2194
+ "loss": 0.0917,
2195
+ "step": 1026
2196
+ },
2197
+ {
2198
+ "epoch": 15.36,
2199
+ "learning_rate": 4.2202063351238774e-07,
2200
+ "loss": 0.0468,
2201
+ "step": 1029
2202
+ },
2203
+ {
2204
+ "epoch": 15.4,
2205
+ "learning_rate": 3.652575212987952e-07,
2206
+ "loss": 0.0243,
2207
+ "step": 1032
2208
+ },
2209
+ {
2210
+ "epoch": 15.45,
2211
+ "learning_rate": 3.1257848639730803e-07,
2212
+ "loss": 0.0767,
2213
+ "step": 1035
2214
+ },
2215
+ {
2216
+ "epoch": 15.49,
2217
+ "learning_rate": 2.6398786337762936e-07,
2218
+ "loss": 0.0509,
2219
+ "step": 1038
2220
+ },
2221
+ {
2222
+ "epoch": 15.54,
2223
+ "learning_rate": 2.1948965040417547e-07,
2224
+ "loss": 0.0546,
2225
+ "step": 1041
2226
+ },
2227
+ {
2228
+ "epoch": 15.58,
2229
+ "learning_rate": 1.790875089070887e-07,
2230
+ "loss": 0.0451,
2231
+ "step": 1044
2232
+ },
2233
+ {
2234
+ "epoch": 15.63,
2235
+ "learning_rate": 1.427847632809509e-07,
2236
+ "loss": 0.0781,
2237
+ "step": 1047
2238
+ },
2239
+ {
2240
+ "epoch": 15.67,
2241
+ "learning_rate": 1.1058440061127439e-07,
2242
+ "loss": 0.0233,
2243
+ "step": 1050
2244
+ },
2245
+ {
2246
+ "epoch": 15.72,
2247
+ "learning_rate": 8.248907042868737e-08,
2248
+ "loss": 0.0393,
2249
+ "step": 1053
2250
+ },
2251
+ {
2252
+ "epoch": 15.76,
2253
+ "learning_rate": 5.850108449094727e-08,
2254
+ "loss": 0.0612,
2255
+ "step": 1056
2256
+ },
2257
+ {
2258
+ "epoch": 15.81,
2259
+ "learning_rate": 3.862241659269294e-08,
2260
+ "loss": 0.0476,
2261
+ "step": 1059
2262
+ },
2263
+ {
2264
+ "epoch": 15.85,
2265
+ "learning_rate": 2.2854702403068972e-08,
2266
+ "loss": 0.0283,
2267
+ "step": 1062
2268
+ },
2269
+ {
2270
+ "epoch": 15.9,
2271
+ "learning_rate": 1.1199239331122214e-08,
2272
+ "loss": 0.0482,
2273
+ "step": 1065
2274
+ },
2275
+ {
2276
+ "epoch": 15.94,
2277
+ "learning_rate": 3.6569864190427738e-09,
2278
+ "loss": 0.052,
2279
+ "step": 1068
2280
+ },
2281
+ {
2282
+ "epoch": 15.99,
2283
+ "learning_rate": 2.2856426326045565e-10,
2284
+ "loss": 0.0485,
2285
+ "step": 1071
2286
+ },
2287
+ {
2288
+ "epoch": 16.0,
2289
+ "eval_loss": 0.8556408882141113,
2290
+ "eval_matthews_correlation": 0.5076423377649488,
2291
+ "eval_runtime": 1.1842,
2292
+ "eval_samples_per_second": 880.772,
2293
+ "eval_steps_per_second": 55.734,
2294
+ "step": 1072
2295
+ },
2296
+ {
2297
+ "epoch": 16.0,
2298
+ "step": 1072,
2299
+ "total_flos": 2253110839083008.0,
2300
+ "train_loss": 0.1824943411264763,
2301
+ "train_runtime": 434.1785,
2302
+ "train_samples_per_second": 315.115,
2303
+ "train_steps_per_second": 2.469
2304
+ }
2305
+ ],
2306
+ "max_steps": 1072,
2307
+ "num_train_epochs": 16,
2308
+ "total_flos": 2253110839083008.0,
2309
+ "trial_name": null,
2310
+ "trial_params": null
2311
+ }