File size: 31,928 Bytes
7ca3dda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 |
2022-02-05 01:08:47,419 ----------------------------------------------------------------------------------------------------
2022-02-05 01:08:47,461 Model: "SequenceTagger(
(embeddings): TransformerWordEmbeddings(
(model): RobertaModel(
(embeddings): RobertaEmbeddings(
(word_embeddings): Embedding(32768, 768, padding_idx=1)
(position_embeddings): Embedding(514, 768, padding_idx=1)
(token_type_embeddings): Embedding(1, 768)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): RobertaEncoder(
(layer): ModuleList(
(0): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(1): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(2): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(3): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(4): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(5): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(6): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(7): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(8): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(9): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(10): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(11): RobertaLayer(
(attention): RobertaAttention(
(self): RobertaSelfAttention(
(query): Linear(in_features=768, out_features=768, bias=True)
(key): Linear(in_features=768, out_features=768, bias=True)
(value): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(output): RobertaSelfOutput(
(dense): Linear(in_features=768, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
(intermediate): RobertaIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
)
(output): RobertaOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
)
(pooler): RobertaPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
)
(word_dropout): WordDropout(p=0.05)
(locked_dropout): LockedDropout(p=0.5)
(linear): Linear(in_features=768, out_features=18, bias=True)
(beta): 1.0
(weights): None
(weight_tensor) None
)"
2022-02-05 01:08:47,466 ----------------------------------------------------------------------------------------------------
2022-02-05 01:08:47,466 Corpus: "Corpus: 126973 train + 7037 dev + 7090 test sentences"
2022-02-05 01:08:47,466 ----------------------------------------------------------------------------------------------------
2022-02-05 01:08:47,466 Parameters:
2022-02-05 01:08:47,466 - learning_rate: "5e-05"
2022-02-05 01:08:47,466 - mini_batch_size: "16"
2022-02-05 01:08:47,466 - patience: "3"
2022-02-05 01:08:47,466 - anneal_factor: "0.5"
2022-02-05 01:08:47,466 - max_epochs: "10"
2022-02-05 01:08:47,466 - shuffle: "True"
2022-02-05 01:08:47,466 - train_with_dev: "False"
2022-02-05 01:08:47,466 - batch_growth_annealing: "False"
2022-02-05 01:08:47,466 ----------------------------------------------------------------------------------------------------
2022-02-05 01:08:47,466 Model training base path: "resources/taggers/ner-dalembert-2ndtry"
2022-02-05 01:08:47,466 ----------------------------------------------------------------------------------------------------
2022-02-05 01:08:47,466 Device: cuda:0
2022-02-05 01:08:47,466 ----------------------------------------------------------------------------------------------------
2022-02-05 01:08:47,467 Embeddings storage mode: none
2022-02-05 01:08:47,469 ----------------------------------------------------------------------------------------------------
2022-02-05 01:15:08,771 epoch 1 - iter 793/7936 - loss 0.78007372 - samples/sec: 33.28 - lr: 0.000005
2022-02-05 01:22:45,940 epoch 1 - iter 1586/7936 - loss 0.41932043 - samples/sec: 27.76 - lr: 0.000010
2022-02-05 01:29:23,897 epoch 1 - iter 2379/7936 - loss 0.33514542 - samples/sec: 31.89 - lr: 0.000015
2022-02-05 01:35:24,915 epoch 1 - iter 3172/7936 - loss 0.30212998 - samples/sec: 35.15 - lr: 0.000020
2022-02-05 01:42:28,297 epoch 1 - iter 3965/7936 - loss 0.27341208 - samples/sec: 29.97 - lr: 0.000025
2022-02-05 01:49:23,543 epoch 1 - iter 4758/7936 - loss 0.25403588 - samples/sec: 30.56 - lr: 0.000030
2022-02-05 01:55:46,783 epoch 1 - iter 5551/7936 - loss 0.24241496 - samples/sec: 33.11 - lr: 0.000035
2022-02-05 02:01:45,654 epoch 1 - iter 6344/7936 - loss 0.23381719 - samples/sec: 35.36 - lr: 0.000040
2022-02-05 02:07:29,407 epoch 1 - iter 7137/7936 - loss 0.22586308 - samples/sec: 36.92 - lr: 0.000045
2022-02-05 02:13:54,603 epoch 1 - iter 7930/7936 - loss 0.21834611 - samples/sec: 32.94 - lr: 0.000050
2022-02-05 02:13:57,692 ----------------------------------------------------------------------------------------------------
2022-02-05 02:13:57,693 EPOCH 1 done: loss 0.2183 - lr 0.0000500
2022-02-05 02:16:47,190 DEV : loss 0.0355144739151001 - f1-score (micro avg) 0.8254
2022-02-05 02:16:47,244 BAD EPOCHS (no improvement): 4
2022-02-05 02:16:47,244 ----------------------------------------------------------------------------------------------------
2022-02-05 02:23:15,435 epoch 2 - iter 793/7936 - loss 0.14903310 - samples/sec: 32.69 - lr: 0.000049
2022-02-05 02:30:06,605 epoch 2 - iter 1586/7936 - loss 0.14777394 - samples/sec: 30.86 - lr: 0.000049
2022-02-05 02:36:48,570 epoch 2 - iter 2379/7936 - loss 0.14637300 - samples/sec: 31.57 - lr: 0.000048
2022-02-05 02:43:37,172 epoch 2 - iter 3172/7936 - loss 0.14491485 - samples/sec: 31.06 - lr: 0.000048
2022-02-05 02:50:13,040 epoch 2 - iter 3965/7936 - loss 0.14361996 - samples/sec: 32.06 - lr: 0.000047
2022-02-05 02:56:49,904 epoch 2 - iter 4758/7936 - loss 0.14232123 - samples/sec: 31.98 - lr: 0.000047
2022-02-05 03:03:34,383 epoch 2 - iter 5551/7936 - loss 0.14116820 - samples/sec: 31.38 - lr: 0.000046
2022-02-05 03:10:09,778 epoch 2 - iter 6344/7936 - loss 0.14001072 - samples/sec: 32.10 - lr: 0.000046
2022-02-05 03:16:43,847 epoch 2 - iter 7137/7936 - loss 0.13868572 - samples/sec: 32.20 - lr: 0.000045
2022-02-05 03:23:28,994 epoch 2 - iter 7930/7936 - loss 0.13731517 - samples/sec: 31.33 - lr: 0.000044
2022-02-05 03:23:31,622 ----------------------------------------------------------------------------------------------------
2022-02-05 03:23:31,623 EPOCH 2 done: loss 0.1373 - lr 0.0000444
2022-02-05 03:26:13,727 DEV : loss 0.015243684872984886 - f1-score (micro avg) 0.9132
2022-02-05 03:26:13,788 BAD EPOCHS (no improvement): 4
2022-02-05 03:26:13,806 ----------------------------------------------------------------------------------------------------
2022-02-05 03:32:57,765 epoch 3 - iter 793/7936 - loss 0.11924788 - samples/sec: 31.42 - lr: 0.000044
2022-02-05 03:39:33,229 epoch 3 - iter 1586/7936 - loss 0.11867811 - samples/sec: 32.09 - lr: 0.000043
2022-02-05 03:46:09,619 epoch 3 - iter 2379/7936 - loss 0.11819415 - samples/sec: 32.01 - lr: 0.000043
2022-02-05 03:52:49,510 epoch 3 - iter 3172/7936 - loss 0.11779082 - samples/sec: 31.74 - lr: 0.000042
2022-02-05 03:59:27,917 epoch 3 - iter 3965/7936 - loss 0.11691604 - samples/sec: 31.85 - lr: 0.000042
2022-02-05 04:06:01,365 epoch 3 - iter 4758/7936 - loss 0.11592267 - samples/sec: 32.26 - lr: 0.000041
2022-02-05 04:12:41,174 epoch 3 - iter 5551/7936 - loss 0.11480043 - samples/sec: 31.74 - lr: 0.000041
2022-02-05 04:19:14,243 epoch 3 - iter 6344/7936 - loss 0.11389582 - samples/sec: 32.29 - lr: 0.000040
2022-02-05 04:25:45,192 epoch 3 - iter 7137/7936 - loss 0.11289267 - samples/sec: 32.46 - lr: 0.000039
2022-02-05 04:32:26,310 epoch 3 - iter 7930/7936 - loss 0.11196899 - samples/sec: 31.64 - lr: 0.000039
2022-02-05 04:32:29,352 ----------------------------------------------------------------------------------------------------
2022-02-05 04:32:29,353 EPOCH 3 done: loss 0.1120 - lr 0.0000389
2022-02-05 04:35:09,639 DEV : loss 0.016585879027843475 - f1-score (micro avg) 0.9229
2022-02-05 04:35:09,698 BAD EPOCHS (no improvement): 4
2022-02-05 04:35:09,698 ----------------------------------------------------------------------------------------------------
2022-02-05 04:41:46,821 epoch 4 - iter 793/7936 - loss 0.09739851 - samples/sec: 31.96 - lr: 0.000038
2022-02-05 04:48:23,504 epoch 4 - iter 1586/7936 - loss 0.09750632 - samples/sec: 31.99 - lr: 0.000038
2022-02-05 04:55:05,833 epoch 4 - iter 2379/7936 - loss 0.09636659 - samples/sec: 31.54 - lr: 0.000037
2022-02-05 05:01:34,951 epoch 4 - iter 3172/7936 - loss 0.09583742 - samples/sec: 32.61 - lr: 0.000037
2022-02-05 05:08:07,163 epoch 4 - iter 3965/7936 - loss 0.09518243 - samples/sec: 32.36 - lr: 0.000036
2022-02-05 05:14:50,781 epoch 4 - iter 4758/7936 - loss 0.09444265 - samples/sec: 31.44 - lr: 0.000036
2022-02-05 05:21:24,983 epoch 4 - iter 5551/7936 - loss 0.09374740 - samples/sec: 32.19 - lr: 0.000035
2022-02-05 05:27:54,052 epoch 4 - iter 6344/7936 - loss 0.09321236 - samples/sec: 32.62 - lr: 0.000034
2022-02-05 05:34:32,228 epoch 4 - iter 7137/7936 - loss 0.09231997 - samples/sec: 31.87 - lr: 0.000034
2022-02-05 05:41:08,580 epoch 4 - iter 7930/7936 - loss 0.09147929 - samples/sec: 32.02 - lr: 0.000033
2022-02-05 05:41:11,479 ----------------------------------------------------------------------------------------------------
2022-02-05 05:41:11,479 EPOCH 4 done: loss 0.0915 - lr 0.0000333
2022-02-05 05:44:00,197 DEV : loss 0.016923826187849045 - f1-score (micro avg) 0.9213
2022-02-05 05:44:00,256 BAD EPOCHS (no improvement): 4
2022-02-05 05:44:00,270 ----------------------------------------------------------------------------------------------------
2022-02-05 05:50:27,537 epoch 5 - iter 793/7936 - loss 0.07986125 - samples/sec: 32.77 - lr: 0.000033
2022-02-05 05:56:56,203 epoch 5 - iter 1586/7936 - loss 0.08031745 - samples/sec: 32.65 - lr: 0.000032
2022-02-05 06:03:34,109 epoch 5 - iter 2379/7936 - loss 0.07984185 - samples/sec: 31.89 - lr: 0.000032
2022-02-05 06:10:03,550 epoch 5 - iter 3172/7936 - loss 0.07905074 - samples/sec: 32.59 - lr: 0.000031
2022-02-05 06:16:30,085 epoch 5 - iter 3965/7936 - loss 0.07843193 - samples/sec: 32.83 - lr: 0.000031
2022-02-05 06:23:10,671 epoch 5 - iter 4758/7936 - loss 0.07785540 - samples/sec: 31.68 - lr: 0.000030
2022-02-05 06:29:45,063 epoch 5 - iter 5551/7936 - loss 0.07709413 - samples/sec: 32.18 - lr: 0.000029
2022-02-05 06:36:23,513 epoch 5 - iter 6344/7936 - loss 0.07634510 - samples/sec: 31.85 - lr: 0.000029
2022-02-05 06:42:51,615 epoch 5 - iter 7137/7936 - loss 0.07566508 - samples/sec: 32.70 - lr: 0.000028
2022-02-05 06:49:23,409 epoch 5 - iter 7930/7936 - loss 0.07495508 - samples/sec: 32.39 - lr: 0.000028
2022-02-05 06:49:26,372 ----------------------------------------------------------------------------------------------------
2022-02-05 06:49:26,373 EPOCH 5 done: loss 0.0750 - lr 0.0000278
2022-02-05 06:52:15,459 DEV : loss 0.017464155331254005 - f1-score (micro avg) 0.9311
2022-02-05 06:52:15,518 BAD EPOCHS (no improvement): 4
2022-02-05 06:52:15,518 ----------------------------------------------------------------------------------------------------
2022-02-05 06:58:49,072 epoch 6 - iter 793/7936 - loss 0.06552824 - samples/sec: 32.25 - lr: 0.000027
2022-02-05 07:05:27,796 epoch 6 - iter 1586/7936 - loss 0.06569517 - samples/sec: 31.83 - lr: 0.000027
2022-02-05 07:11:58,162 epoch 6 - iter 2379/7936 - loss 0.06536467 - samples/sec: 32.51 - lr: 0.000026
2022-02-05 07:18:25,878 epoch 6 - iter 3172/7936 - loss 0.06467146 - samples/sec: 32.73 - lr: 0.000026
2022-02-05 07:25:10,562 epoch 6 - iter 3965/7936 - loss 0.06426965 - samples/sec: 31.36 - lr: 0.000025
2022-02-05 07:31:39,437 epoch 6 - iter 4758/7936 - loss 0.06371305 - samples/sec: 32.63 - lr: 0.000024
2022-02-05 07:38:08,323 epoch 6 - iter 5551/7936 - loss 0.06328229 - samples/sec: 32.63 - lr: 0.000024
2022-02-05 07:44:52,176 epoch 6 - iter 6344/7936 - loss 0.06272143 - samples/sec: 31.42 - lr: 0.000023
2022-02-05 07:51:20,507 epoch 6 - iter 7137/7936 - loss 0.06218937 - samples/sec: 32.68 - lr: 0.000023
2022-02-05 07:57:52,828 epoch 6 - iter 7930/7936 - loss 0.06175113 - samples/sec: 32.35 - lr: 0.000022
2022-02-05 07:57:55,686 ----------------------------------------------------------------------------------------------------
2022-02-05 07:57:55,687 EPOCH 6 done: loss 0.0617 - lr 0.0000222
2022-02-05 08:00:45,565 DEV : loss 0.01982131227850914 - f1-score (micro avg) 0.9358
2022-02-05 08:00:45,625 BAD EPOCHS (no improvement): 4
2022-02-05 08:00:45,644 ----------------------------------------------------------------------------------------------------
2022-02-05 08:07:26,967 epoch 7 - iter 793/7936 - loss 0.05520420 - samples/sec: 31.62 - lr: 0.000022
2022-02-05 08:13:58,782 epoch 7 - iter 1586/7936 - loss 0.05522964 - samples/sec: 32.39 - lr: 0.000021
2022-02-05 08:20:32,705 epoch 7 - iter 2379/7936 - loss 0.05482898 - samples/sec: 32.21 - lr: 0.000021
2022-02-05 08:27:14,353 epoch 7 - iter 3172/7936 - loss 0.05433105 - samples/sec: 31.59 - lr: 0.000020
2022-02-05 08:33:45,236 epoch 7 - iter 3965/7936 - loss 0.05397125 - samples/sec: 32.47 - lr: 0.000019
2022-02-05 08:40:14,072 epoch 7 - iter 4758/7936 - loss 0.05348281 - samples/sec: 32.64 - lr: 0.000019
2022-02-05 08:46:52,674 epoch 7 - iter 5551/7936 - loss 0.05316673 - samples/sec: 31.84 - lr: 0.000018
2022-02-05 08:53:20,653 epoch 7 - iter 6344/7936 - loss 0.05275831 - samples/sec: 32.71 - lr: 0.000018
2022-02-05 08:59:52,741 epoch 7 - iter 7137/7936 - loss 0.05230036 - samples/sec: 32.37 - lr: 0.000017
2022-02-05 09:06:38,983 epoch 7 - iter 7930/7936 - loss 0.05190552 - samples/sec: 31.24 - lr: 0.000017
2022-02-05 09:06:41,639 ----------------------------------------------------------------------------------------------------
2022-02-05 09:06:41,639 EPOCH 7 done: loss 0.0519 - lr 0.0000167
2022-02-05 09:09:20,864 DEV : loss 0.02467426098883152 - f1-score (micro avg) 0.9355
2022-02-05 09:09:20,924 BAD EPOCHS (no improvement): 4
2022-02-05 09:09:20,939 ----------------------------------------------------------------------------------------------------
2022-02-05 09:16:05,134 epoch 8 - iter 793/7936 - loss 0.04726178 - samples/sec: 31.40 - lr: 0.000016
2022-02-05 09:22:33,870 epoch 8 - iter 1586/7936 - loss 0.04719666 - samples/sec: 32.64 - lr: 0.000016
2022-02-05 09:29:02,929 epoch 8 - iter 2379/7936 - loss 0.04663752 - samples/sec: 32.62 - lr: 0.000015
2022-02-05 09:35:42,369 epoch 8 - iter 3172/7936 - loss 0.04634901 - samples/sec: 31.77 - lr: 0.000014
2022-02-05 09:42:14,843 epoch 8 - iter 3965/7936 - loss 0.04602895 - samples/sec: 32.33 - lr: 0.000014
2022-02-05 09:48:48,062 epoch 8 - iter 4758/7936 - loss 0.04582764 - samples/sec: 32.27 - lr: 0.000013
2022-02-05 09:55:28,863 epoch 8 - iter 5551/7936 - loss 0.04566599 - samples/sec: 31.66 - lr: 0.000013
2022-02-05 10:01:52,699 epoch 8 - iter 6344/7936 - loss 0.04545939 - samples/sec: 33.06 - lr: 0.000012
2022-02-05 10:08:33,137 epoch 8 - iter 7137/7936 - loss 0.04526206 - samples/sec: 31.69 - lr: 0.000012
2022-02-05 10:15:07,241 epoch 8 - iter 7930/7936 - loss 0.04503385 - samples/sec: 32.20 - lr: 0.000011
2022-02-05 10:15:10,600 ----------------------------------------------------------------------------------------------------
2022-02-05 10:15:10,600 EPOCH 8 done: loss 0.0450 - lr 0.0000111
2022-02-05 10:18:00,280 DEV : loss 0.02364770695567131 - f1-score (micro avg) 0.9371
2022-02-05 10:18:00,339 BAD EPOCHS (no improvement): 4
2022-02-05 10:18:00,358 ----------------------------------------------------------------------------------------------------
2022-02-05 10:24:31,011 epoch 9 - iter 793/7936 - loss 0.04122325 - samples/sec: 32.48 - lr: 0.000011
2022-02-05 10:31:00,279 epoch 9 - iter 1586/7936 - loss 0.04130931 - samples/sec: 32.60 - lr: 0.000010
2022-02-05 10:37:40,369 epoch 9 - iter 2379/7936 - loss 0.04131112 - samples/sec: 31.72 - lr: 0.000009
2022-02-05 10:44:11,067 epoch 9 - iter 3172/7936 - loss 0.04141124 - samples/sec: 32.48 - lr: 0.000009
2022-02-05 10:50:41,270 epoch 9 - iter 3965/7936 - loss 0.04120608 - samples/sec: 32.52 - lr: 0.000008
2022-02-05 10:57:24,718 epoch 9 - iter 4758/7936 - loss 0.04108655 - samples/sec: 31.45 - lr: 0.000008
2022-02-05 11:04:00,581 epoch 9 - iter 5551/7936 - loss 0.04093370 - samples/sec: 32.06 - lr: 0.000007
2022-02-05 11:10:31,042 epoch 9 - iter 6344/7936 - loss 0.04078404 - samples/sec: 32.50 - lr: 0.000007
2022-02-05 11:17:13,751 epoch 9 - iter 7137/7936 - loss 0.04061073 - samples/sec: 31.51 - lr: 0.000006
2022-02-05 11:23:44,231 epoch 9 - iter 7930/7936 - loss 0.04050638 - samples/sec: 32.50 - lr: 0.000006
2022-02-05 11:23:47,941 ----------------------------------------------------------------------------------------------------
2022-02-05 11:23:47,942 EPOCH 9 done: loss 0.0405 - lr 0.0000056
2022-02-05 11:26:37,114 DEV : loss 0.026182951405644417 - f1-score (micro avg) 0.9361
2022-02-05 11:26:37,173 BAD EPOCHS (no improvement): 4
2022-02-05 11:26:37,186 ----------------------------------------------------------------------------------------------------
2022-02-05 11:33:05,778 epoch 10 - iter 793/7936 - loss 0.03876526 - samples/sec: 32.66 - lr: 0.000005
2022-02-05 11:39:45,501 epoch 10 - iter 1586/7936 - loss 0.03871561 - samples/sec: 31.75 - lr: 0.000004
2022-02-05 11:46:18,242 epoch 10 - iter 2379/7936 - loss 0.03842790 - samples/sec: 32.31 - lr: 0.000004
2022-02-05 11:52:48,370 epoch 10 - iter 3172/7936 - loss 0.03820246 - samples/sec: 32.53 - lr: 0.000003
2022-02-05 11:59:28,420 epoch 10 - iter 3965/7936 - loss 0.03807900 - samples/sec: 31.72 - lr: 0.000003
2022-02-05 12:05:57,882 epoch 10 - iter 4758/7936 - loss 0.03798954 - samples/sec: 32.58 - lr: 0.000002
2022-02-05 12:12:25,766 epoch 10 - iter 5551/7936 - loss 0.03803371 - samples/sec: 32.72 - lr: 0.000002
2022-02-05 12:19:03,411 epoch 10 - iter 6344/7936 - loss 0.03805844 - samples/sec: 31.91 - lr: 0.000001
2022-02-05 12:25:27,539 epoch 10 - iter 7137/7936 - loss 0.03799490 - samples/sec: 33.04 - lr: 0.000001
2022-02-05 12:31:55,442 epoch 10 - iter 7930/7936 - loss 0.03798541 - samples/sec: 32.71 - lr: 0.000000
2022-02-05 12:31:58,461 ----------------------------------------------------------------------------------------------------
2022-02-05 12:31:58,462 EPOCH 10 done: loss 0.0380 - lr 0.0000000
2022-02-05 12:34:45,700 DEV : loss 0.027400659397244453 - f1-score (micro avg) 0.9368
2022-02-05 12:34:45,760 BAD EPOCHS (no improvement): 4
2022-02-05 12:34:46,755 ----------------------------------------------------------------------------------------------------
2022-02-05 12:34:46,757 Testing using last state of model ...
2022-02-05 12:37:34,421 0.9329 0.9323 0.9326 0.8893
2022-02-05 12:37:34,422
Results:
- F-score (micro) 0.9326
- F-score (macro) 0.9111
- Accuracy 0.8893
By class:
precision recall f1-score support
pers 0.9355 0.9279 0.9317 2734
loc 0.9242 0.9335 0.9288 1384
amount 0.9800 0.9800 0.9800 250
time 0.9456 0.9576 0.9516 236
func 0.9333 0.9000 0.9164 140
org 0.8148 0.8980 0.8544 49
prod 0.8621 0.9259 0.8929 27
event 0.8333 0.8333 0.8333 12
micro avg 0.9329 0.9323 0.9326 4832
macro avg 0.9036 0.9195 0.9111 4832
weighted avg 0.9331 0.9323 0.9327 4832
samples avg 0.8893 0.8893 0.8893 4832
2022-02-05 12:37:34,422 ----------------------------------------------------------------------------------------------------
|