w11wo commited on
Commit
aea591b
1 Parent(s): aba5c51

End of training

Browse files
README.md CHANGED
@@ -20,11 +20,11 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  This model is a fine-tuned version of [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.7134
24
- - Accuracy: 0.8843
25
- - F1: 0.7634
26
- - Precision: 0.7812
27
- - Recall: 0.7463
28
 
29
  ## Model description
30
 
 
20
 
21
  This model is a fine-tuned version of [xlm-roberta-base](https://huggingface.co/xlm-roberta-base) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.4359
24
+ - Accuracy: 0.8513
25
+ - F1: 0.7386
26
+ - Precision: 0.6570
27
+ - Recall: 0.8433
28
 
29
  ## Model description
30
 
all_results.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 13.0,
3
+ "eval_accuracy": 0.8513011152416357,
4
+ "eval_f1": 0.7385620915032679,
5
+ "eval_loss": 0.43591225147247314,
6
+ "eval_precision": 0.6569767441860465,
7
+ "eval_recall": 0.8432835820895522,
8
+ "eval_runtime": 2.1114,
9
+ "eval_samples": 268,
10
+ "eval_samples_per_second": 254.812,
11
+ "eval_steps_per_second": 4.263,
12
+ "train_loss": 0.2951739639471221,
13
+ "train_runtime": 432.0494,
14
+ "train_samples": 1878,
15
+ "train_samples_per_second": 434.673,
16
+ "train_steps_per_second": 13.656
17
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 13.0,
3
+ "eval_accuracy": 0.8513011152416357,
4
+ "eval_f1": 0.7385620915032679,
5
+ "eval_loss": 0.43591225147247314,
6
+ "eval_precision": 0.6569767441860465,
7
+ "eval_recall": 0.8432835820895522,
8
+ "eval_runtime": 2.1114,
9
+ "eval_samples": 268,
10
+ "eval_samples_per_second": 254.812,
11
+ "eval_steps_per_second": 4.263
12
+ }
predict_results.txt ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index prediction
2
+ 0 1
3
+ 1 0
4
+ 2 0
5
+ 3 0
6
+ 4 1
7
+ 5 0
8
+ 6 1
9
+ 7 1
10
+ 8 0
11
+ 9 0
12
+ 10 0
13
+ 11 0
14
+ 12 1
15
+ 13 0
16
+ 14 0
17
+ 15 1
18
+ 16 1
19
+ 17 0
20
+ 18 1
21
+ 19 0
22
+ 20 0
23
+ 21 0
24
+ 22 0
25
+ 23 1
26
+ 24 0
27
+ 25 0
28
+ 26 1
29
+ 27 1
30
+ 28 1
31
+ 29 0
32
+ 30 0
33
+ 31 1
34
+ 32 1
35
+ 33 1
36
+ 34 0
37
+ 35 0
38
+ 36 0
39
+ 37 0
40
+ 38 0
41
+ 39 0
42
+ 40 0
43
+ 41 0
44
+ 42 0
45
+ 43 0
46
+ 44 0
47
+ 45 0
48
+ 46 0
49
+ 47 0
50
+ 48 0
51
+ 49 0
52
+ 50 1
53
+ 51 0
54
+ 52 0
55
+ 53 0
56
+ 54 0
57
+ 55 0
58
+ 56 0
59
+ 57 0
60
+ 58 0
61
+ 59 0
62
+ 60 0
63
+ 61 1
64
+ 62 0
65
+ 63 1
66
+ 64 0
67
+ 65 0
68
+ 66 0
69
+ 67 0
70
+ 68 0
71
+ 69 0
72
+ 70 0
73
+ 71 1
74
+ 72 1
75
+ 73 1
76
+ 74 0
77
+ 75 0
78
+ 76 1
79
+ 77 0
80
+ 78 0
81
+ 79 0
82
+ 80 0
83
+ 81 0
84
+ 82 0
85
+ 83 1
86
+ 84 1
87
+ 85 0
88
+ 86 1
89
+ 87 0
90
+ 88 1
91
+ 89 0
92
+ 90 0
93
+ 91 0
94
+ 92 1
95
+ 93 0
96
+ 94 1
97
+ 95 0
98
+ 96 1
99
+ 97 0
100
+ 98 0
101
+ 99 1
102
+ 100 0
103
+ 101 0
104
+ 102 0
105
+ 103 0
106
+ 104 0
107
+ 105 1
108
+ 106 0
109
+ 107 0
110
+ 108 1
111
+ 109 0
112
+ 110 0
113
+ 111 1
114
+ 112 0
115
+ 113 0
116
+ 114 1
117
+ 115 0
118
+ 116 0
119
+ 117 0
120
+ 118 0
121
+ 119 1
122
+ 120 0
123
+ 121 0
124
+ 122 0
125
+ 123 0
126
+ 124 1
127
+ 125 0
128
+ 126 0
129
+ 127 0
130
+ 128 0
131
+ 129 0
132
+ 130 1
133
+ 131 0
134
+ 132 0
135
+ 133 0
136
+ 134 0
137
+ 135 0
138
+ 136 1
139
+ 137 0
140
+ 138 0
141
+ 139 0
142
+ 140 1
143
+ 141 0
144
+ 142 1
145
+ 143 0
146
+ 144 1
147
+ 145 0
148
+ 146 0
149
+ 147 1
150
+ 148 1
151
+ 149 0
152
+ 150 1
153
+ 151 0
154
+ 152 0
155
+ 153 0
156
+ 154 0
157
+ 155 0
158
+ 156 0
159
+ 157 0
160
+ 158 1
161
+ 159 0
162
+ 160 0
163
+ 161 0
164
+ 162 0
165
+ 163 1
166
+ 164 0
167
+ 165 1
168
+ 166 0
169
+ 167 1
170
+ 168 0
171
+ 169 0
172
+ 170 0
173
+ 171 1
174
+ 172 0
175
+ 173 0
176
+ 174 1
177
+ 175 0
178
+ 176 0
179
+ 177 1
180
+ 178 0
181
+ 179 1
182
+ 180 1
183
+ 181 0
184
+ 182 1
185
+ 183 0
186
+ 184 0
187
+ 185 1
188
+ 186 1
189
+ 187 1
190
+ 188 0
191
+ 189 0
192
+ 190 0
193
+ 191 0
194
+ 192 1
195
+ 193 0
196
+ 194 0
197
+ 195 0
198
+ 196 0
199
+ 197 0
200
+ 198 1
201
+ 199 0
202
+ 200 0
203
+ 201 0
204
+ 202 1
205
+ 203 1
206
+ 204 0
207
+ 205 1
208
+ 206 1
209
+ 207 0
210
+ 208 0
211
+ 209 0
212
+ 210 0
213
+ 211 0
214
+ 212 1
215
+ 213 0
216
+ 214 0
217
+ 215 0
218
+ 216 1
219
+ 217 0
220
+ 218 1
221
+ 219 0
222
+ 220 0
223
+ 221 0
224
+ 222 0
225
+ 223 1
226
+ 224 0
227
+ 225 0
228
+ 226 1
229
+ 227 0
230
+ 228 1
231
+ 229 0
232
+ 230 0
233
+ 231 1
234
+ 232 1
235
+ 233 0
236
+ 234 1
237
+ 235 0
238
+ 236 0
239
+ 237 1
240
+ 238 0
241
+ 239 0
242
+ 240 1
243
+ 241 0
244
+ 242 0
245
+ 243 0
246
+ 244 0
247
+ 245 0
248
+ 246 0
249
+ 247 0
250
+ 248 0
251
+ 249 1
252
+ 250 0
253
+ 251 0
254
+ 252 0
255
+ 253 0
256
+ 254 1
257
+ 255 0
258
+ 256 0
259
+ 257 1
260
+ 258 0
261
+ 259 1
262
+ 260 1
263
+ 261 0
264
+ 262 0
265
+ 263 0
266
+ 264 0
267
+ 265 1
268
+ 266 1
269
+ 267 0
270
+ 268 0
271
+ 269 0
272
+ 270 0
273
+ 271 1
274
+ 272 1
275
+ 273 0
276
+ 274 0
277
+ 275 0
278
+ 276 0
279
+ 277 0
280
+ 278 1
281
+ 279 1
282
+ 280 0
283
+ 281 1
284
+ 282 0
285
+ 283 0
286
+ 284 0
287
+ 285 0
288
+ 286 0
289
+ 287 0
290
+ 288 0
291
+ 289 0
292
+ 290 0
293
+ 291 0
294
+ 292 1
295
+ 293 0
296
+ 294 1
297
+ 295 0
298
+ 296 0
299
+ 297 1
300
+ 298 1
301
+ 299 1
302
+ 300 0
303
+ 301 1
304
+ 302 0
305
+ 303 1
306
+ 304 1
307
+ 305 0
308
+ 306 1
309
+ 307 0
310
+ 308 0
311
+ 309 0
312
+ 310 0
313
+ 311 0
314
+ 312 0
315
+ 313 1
316
+ 314 0
317
+ 315 0
318
+ 316 0
319
+ 317 0
320
+ 318 1
321
+ 319 0
322
+ 320 0
323
+ 321 1
324
+ 322 0
325
+ 323 0
326
+ 324 1
327
+ 325 1
328
+ 326 1
329
+ 327 0
330
+ 328 0
331
+ 329 0
332
+ 330 0
333
+ 331 1
334
+ 332 0
335
+ 333 0
336
+ 334 0
337
+ 335 0
338
+ 336 0
339
+ 337 0
340
+ 338 1
341
+ 339 0
342
+ 340 1
343
+ 341 1
344
+ 342 0
345
+ 343 0
346
+ 344 0
347
+ 345 0
348
+ 346 1
349
+ 347 0
350
+ 348 1
351
+ 349 0
352
+ 350 0
353
+ 351 0
354
+ 352 0
355
+ 353 1
356
+ 354 0
357
+ 355 0
358
+ 356 0
359
+ 357 0
360
+ 358 1
361
+ 359 1
362
+ 360 1
363
+ 361 0
364
+ 362 1
365
+ 363 1
366
+ 364 0
367
+ 365 0
368
+ 366 1
369
+ 367 0
370
+ 368 1
371
+ 369 0
372
+ 370 0
373
+ 371 1
374
+ 372 0
375
+ 373 1
376
+ 374 0
377
+ 375 0
378
+ 376 0
379
+ 377 0
380
+ 378 1
381
+ 379 0
382
+ 380 1
383
+ 381 0
384
+ 382 1
385
+ 383 0
386
+ 384 1
387
+ 385 0
388
+ 386 0
389
+ 387 1
390
+ 388 1
391
+ 389 0
392
+ 390 1
393
+ 391 0
394
+ 392 1
395
+ 393 0
396
+ 394 1
397
+ 395 0
398
+ 396 0
399
+ 397 0
400
+ 398 0
401
+ 399 0
402
+ 400 1
403
+ 401 0
404
+ 402 0
405
+ 403 1
406
+ 404 0
407
+ 405 1
408
+ 406 1
409
+ 407 0
410
+ 408 0
411
+ 409 0
412
+ 410 0
413
+ 411 1
414
+ 412 0
415
+ 413 0
416
+ 414 0
417
+ 415 0
418
+ 416 0
419
+ 417 1
420
+ 418 0
421
+ 419 1
422
+ 420 1
423
+ 421 0
424
+ 422 1
425
+ 423 1
426
+ 424 1
427
+ 425 0
428
+ 426 1
429
+ 427 0
430
+ 428 0
431
+ 429 0
432
+ 430 0
433
+ 431 0
434
+ 432 0
435
+ 433 1
436
+ 434 1
437
+ 435 0
438
+ 436 1
439
+ 437 0
440
+ 438 0
441
+ 439 0
442
+ 440 0
443
+ 441 1
444
+ 442 0
445
+ 443 0
446
+ 444 0
447
+ 445 0
448
+ 446 0
449
+ 447 0
450
+ 448 1
451
+ 449 0
452
+ 450 0
453
+ 451 1
454
+ 452 0
455
+ 453 0
456
+ 454 0
457
+ 455 0
458
+ 456 1
459
+ 457 0
460
+ 458 0
461
+ 459 0
462
+ 460 0
463
+ 461 0
464
+ 462 0
465
+ 463 1
466
+ 464 1
467
+ 465 0
468
+ 466 1
469
+ 467 1
470
+ 468 0
471
+ 469 0
472
+ 470 0
473
+ 471 1
474
+ 472 0
475
+ 473 0
476
+ 474 0
477
+ 475 1
478
+ 476 0
479
+ 477 0
480
+ 478 0
481
+ 479 0
482
+ 480 1
483
+ 481 1
484
+ 482 0
485
+ 483 0
486
+ 484 0
487
+ 485 0
488
+ 486 1
489
+ 487 1
490
+ 488 1
491
+ 489 0
492
+ 490 0
493
+ 491 0
494
+ 492 0
495
+ 493 1
496
+ 494 1
497
+ 495 1
498
+ 496 1
499
+ 497 0
500
+ 498 0
501
+ 499 0
502
+ 500 0
503
+ 501 0
504
+ 502 0
505
+ 503 0
506
+ 504 0
507
+ 505 0
508
+ 506 0
509
+ 507 0
510
+ 508 0
511
+ 509 0
512
+ 510 0
513
+ 511 0
514
+ 512 0
515
+ 513 0
516
+ 514 0
517
+ 515 0
518
+ 516 1
519
+ 517 1
520
+ 518 0
521
+ 519 0
522
+ 520 0
523
+ 521 1
524
+ 522 1
525
+ 523 0
526
+ 524 0
527
+ 525 0
528
+ 526 0
529
+ 527 1
530
+ 528 0
531
+ 529 1
532
+ 530 1
533
+ 531 0
534
+ 532 1
535
+ 533 1
536
+ 534 0
537
+ 535 1
538
+ 536 1
539
+ 537 1
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 13.0,
3
+ "train_loss": 0.2951739639471221,
4
+ "train_runtime": 432.0494,
5
+ "train_samples": 1878,
6
+ "train_samples_per_second": 434.673,
7
+ "train_steps_per_second": 13.656
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7894736842105264,
3
+ "best_model_checkpoint": "outputs/xlm-roberta-base-twitter-indonesia-sarcastic/checkpoint-590",
4
+ "epoch": 13.0,
5
+ "eval_steps": 500,
6
+ "global_step": 767,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 9.997697221833061e-06,
14
+ "loss": 0.5641,
15
+ "step": 59
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_accuracy": 0.75,
20
+ "eval_f1": 0.0,
21
+ "eval_loss": 0.5259643793106079,
22
+ "eval_precision": 0.0,
23
+ "eval_recall": 0.0,
24
+ "eval_runtime": 1.0571,
25
+ "eval_samples_per_second": 253.522,
26
+ "eval_steps_per_second": 4.73,
27
+ "step": 59
28
+ },
29
+ {
30
+ "epoch": 2.0,
31
+ "learning_rate": 9.990465155165683e-06,
32
+ "loss": 0.5317,
33
+ "step": 118
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_accuracy": 0.75,
38
+ "eval_f1": 0.0,
39
+ "eval_loss": 0.5029988288879395,
40
+ "eval_precision": 0.0,
41
+ "eval_recall": 0.0,
42
+ "eval_runtime": 1.053,
43
+ "eval_samples_per_second": 254.519,
44
+ "eval_steps_per_second": 4.748,
45
+ "step": 118
46
+ },
47
+ {
48
+ "epoch": 3.0,
49
+ "learning_rate": 9.978555124138569e-06,
50
+ "loss": 0.4995,
51
+ "step": 177
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.75,
56
+ "eval_f1": 0.0,
57
+ "eval_loss": 0.4655507504940033,
58
+ "eval_precision": 0.0,
59
+ "eval_recall": 0.0,
60
+ "eval_runtime": 1.0523,
61
+ "eval_samples_per_second": 254.669,
62
+ "eval_steps_per_second": 4.751,
63
+ "step": 177
64
+ },
65
+ {
66
+ "epoch": 4.0,
67
+ "learning_rate": 9.961568226140335e-06,
68
+ "loss": 0.4599,
69
+ "step": 236
70
+ },
71
+ {
72
+ "epoch": 4.0,
73
+ "eval_accuracy": 0.7686567164179104,
74
+ "eval_f1": 0.6025641025641025,
75
+ "eval_loss": 0.4502911865711212,
76
+ "eval_precision": 0.5280898876404494,
77
+ "eval_recall": 0.7014925373134329,
78
+ "eval_runtime": 1.057,
79
+ "eval_samples_per_second": 253.552,
80
+ "eval_steps_per_second": 4.73,
81
+ "step": 236
82
+ },
83
+ {
84
+ "epoch": 5.0,
85
+ "learning_rate": 9.93968485932029e-06,
86
+ "loss": 0.4082,
87
+ "step": 295
88
+ },
89
+ {
90
+ "epoch": 5.0,
91
+ "eval_accuracy": 0.8470149253731343,
92
+ "eval_f1": 0.6434782608695653,
93
+ "eval_loss": 0.378521591424942,
94
+ "eval_precision": 0.7708333333333334,
95
+ "eval_recall": 0.5522388059701493,
96
+ "eval_runtime": 1.0544,
97
+ "eval_samples_per_second": 254.183,
98
+ "eval_steps_per_second": 4.742,
99
+ "step": 295
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "learning_rate": 9.912926619919478e-06,
104
+ "loss": 0.3274,
105
+ "step": 354
106
+ },
107
+ {
108
+ "epoch": 6.0,
109
+ "eval_accuracy": 0.8619402985074627,
110
+ "eval_f1": 0.6991869918699187,
111
+ "eval_loss": 0.3604646623134613,
112
+ "eval_precision": 0.7678571428571429,
113
+ "eval_recall": 0.6417910447761194,
114
+ "eval_runtime": 1.0555,
115
+ "eval_samples_per_second": 253.908,
116
+ "eval_steps_per_second": 4.737,
117
+ "step": 354
118
+ },
119
+ {
120
+ "epoch": 7.0,
121
+ "learning_rate": 9.881895849126432e-06,
122
+ "loss": 0.2621,
123
+ "step": 413
124
+ },
125
+ {
126
+ "epoch": 7.0,
127
+ "eval_accuracy": 0.8619402985074627,
128
+ "eval_f1": 0.6837606837606838,
129
+ "eval_loss": 0.3764938712120056,
130
+ "eval_precision": 0.8,
131
+ "eval_recall": 0.5970149253731343,
132
+ "eval_runtime": 1.0562,
133
+ "eval_samples_per_second": 253.732,
134
+ "eval_steps_per_second": 4.734,
135
+ "step": 413
136
+ },
137
+ {
138
+ "epoch": 8.0,
139
+ "learning_rate": 9.845553233485602e-06,
140
+ "loss": 0.2332,
141
+ "step": 472
142
+ },
143
+ {
144
+ "epoch": 8.0,
145
+ "eval_accuracy": 0.8768656716417911,
146
+ "eval_f1": 0.759124087591241,
147
+ "eval_loss": 0.34082189202308655,
148
+ "eval_precision": 0.7428571428571429,
149
+ "eval_recall": 0.7761194029850746,
150
+ "eval_runtime": 1.0525,
151
+ "eval_samples_per_second": 254.642,
152
+ "eval_steps_per_second": 4.751,
153
+ "step": 472
154
+ },
155
+ {
156
+ "epoch": 9.0,
157
+ "learning_rate": 9.804428641814051e-06,
158
+ "loss": 0.1579,
159
+ "step": 531
160
+ },
161
+ {
162
+ "epoch": 9.0,
163
+ "eval_accuracy": 0.8731343283582089,
164
+ "eval_f1": 0.7213114754098361,
165
+ "eval_loss": 0.43820273876190186,
166
+ "eval_precision": 0.8,
167
+ "eval_recall": 0.6567164179104478,
168
+ "eval_runtime": 1.055,
169
+ "eval_samples_per_second": 254.04,
170
+ "eval_steps_per_second": 4.74,
171
+ "step": 531
172
+ },
173
+ {
174
+ "epoch": 10.0,
175
+ "learning_rate": 9.75856265911873e-06,
176
+ "loss": 0.1467,
177
+ "step": 590
178
+ },
179
+ {
180
+ "epoch": 10.0,
181
+ "eval_accuracy": 0.8805970149253731,
182
+ "eval_f1": 0.7894736842105264,
183
+ "eval_loss": 0.38548171520233154,
184
+ "eval_precision": 0.7058823529411765,
185
+ "eval_recall": 0.8955223880597015,
186
+ "eval_runtime": 1.053,
187
+ "eval_samples_per_second": 254.501,
188
+ "eval_steps_per_second": 4.748,
189
+ "step": 590
190
+ },
191
+ {
192
+ "epoch": 11.0,
193
+ "learning_rate": 9.708000549587091e-06,
194
+ "loss": 0.098,
195
+ "step": 649
196
+ },
197
+ {
198
+ "epoch": 11.0,
199
+ "eval_accuracy": 0.8805970149253731,
200
+ "eval_f1": 0.7500000000000001,
201
+ "eval_loss": 0.46932676434516907,
202
+ "eval_precision": 0.7868852459016393,
203
+ "eval_recall": 0.7164179104477612,
204
+ "eval_runtime": 1.058,
205
+ "eval_samples_per_second": 253.305,
206
+ "eval_steps_per_second": 4.726,
207
+ "step": 649
208
+ },
209
+ {
210
+ "epoch": 12.0,
211
+ "learning_rate": 9.653766360237254e-06,
212
+ "loss": 0.0929,
213
+ "step": 708
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "eval_accuracy": 0.8805970149253731,
218
+ "eval_f1": 0.7333333333333334,
219
+ "eval_loss": 0.6206381916999817,
220
+ "eval_precision": 0.8301886792452831,
221
+ "eval_recall": 0.6567164179104478,
222
+ "eval_runtime": 1.0526,
223
+ "eval_samples_per_second": 254.603,
224
+ "eval_steps_per_second": 4.75,
225
+ "step": 708
226
+ },
227
+ {
228
+ "epoch": 13.0,
229
+ "learning_rate": 9.594043625628571e-06,
230
+ "loss": 0.0555,
231
+ "step": 767
232
+ },
233
+ {
234
+ "epoch": 13.0,
235
+ "eval_accuracy": 0.8843283582089553,
236
+ "eval_f1": 0.7633587786259542,
237
+ "eval_loss": 0.7134280800819397,
238
+ "eval_precision": 0.78125,
239
+ "eval_recall": 0.746268656716418,
240
+ "eval_runtime": 1.054,
241
+ "eval_samples_per_second": 254.279,
242
+ "eval_steps_per_second": 4.744,
243
+ "step": 767
244
+ },
245
+ {
246
+ "epoch": 13.0,
247
+ "step": 767,
248
+ "total_flos": 1605898326389760.0,
249
+ "train_loss": 0.2951739639471221,
250
+ "train_runtime": 432.0494,
251
+ "train_samples_per_second": 434.673,
252
+ "train_steps_per_second": 13.656
253
+ }
254
+ ],
255
+ "logging_steps": 500,
256
+ "max_steps": 5900,
257
+ "num_input_tokens_seen": 0,
258
+ "num_train_epochs": 100,
259
+ "save_steps": 500,
260
+ "total_flos": 1605898326389760.0,
261
+ "train_batch_size": 32,
262
+ "trial_name": null,
263
+ "trial_params": null
264
+ }