Upload 6 files

Browse files

Files changed (6) hide show

dev.tsv +0 -0
final-model.pt +3 -0
loss.tsv +11 -0
test.tsv +0 -0
training.log +538 -0
weights.txt +0 -0

dev.tsv ADDED Viewed

The diff for this file is too large to render. See raw diff

final-model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ce184125031081cef1c7b103a2731875c894080ae0b604bc5b781e87a7a62d0
+size 442756141

loss.tsv ADDED Viewed

	@@ -0,0 +1,11 @@

+EPOCH	TIMESTAMP	BAD_EPOCHS	LEARNING_RATE	TRAIN_LOSS	DEV_LOSS	DEV_PRECISION	DEV_RECALL	DEV_F1	DEV_ACCURACY
+1	12:56:18	4	0.0000	3.8419383407730647	3.509683847427368	0.3053	0.3053	0.3053	0.3053
+2	12:59:20	4	0.0000	3.2227634368718494	2.775869846343994	0.6141	0.6141	0.6141	0.6141
+3	13:02:23	4	0.0000	2.7700508728423903	2.410931348800659	0.819	0.819	0.819	0.819
+4	13:05:27	4	0.0000	2.5123233380738026	2.1908302307128906	0.8605	0.8605	0.8605	0.8605
+5	13:08:30	4	0.0000	2.350920672660358	2.0516607761383057	0.8737	0.8737	0.8737	0.8737
+6	13:11:34	4	0.0000	2.2365647102395845	1.9612011909484863	0.884	0.884	0.884	0.884
+7	13:14:37	4	0.0000	2.1661910551931784	1.8981177806854248	0.9008	0.9008	0.9008	0.9008
+8	13:17:39	4	0.0000	2.1112017686144187	1.8548760414123535	0.9117	0.9117	0.9117	0.9117
+9	13:20:43	4	0.0000	2.0759186003590093	1.830302357673645	0.9161	0.9161	0.9161	0.9161
+10	13:23:46	4	0.0000	2.0624352113596314	1.8217284679412842	0.9195	0.9195	0.9195	0.9195

test.tsv ADDED Viewed

The diff for this file is too large to render. See raw diff

training.log ADDED Viewed

	@@ -0,0 +1,538 @@

+2022-02-04 12:53:17,467 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:17,468 Model: "SequenceTagger(
+  (embeddings): TransformerWordEmbeddings(
+    (model): CamembertModel(
+      (embeddings): RobertaEmbeddings(
+        (word_embeddings): Embedding(32005, 768, padding_idx=1)
+        (position_embeddings): Embedding(514, 768, padding_idx=1)
+        (token_type_embeddings): Embedding(1, 768)
+        (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+        (dropout): Dropout(p=0.1, inplace=False)
+      )
+      (encoder): RobertaEncoder(
+        (layer): ModuleList(
+          (0): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (1): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (2): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (3): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (4): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (5): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (6): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (7): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (8): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (9): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (10): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+          (11): RobertaLayer(
+            (attention): RobertaAttention(
+              (self): RobertaSelfAttention(
+                (query): Linear(in_features=768, out_features=768, bias=True)
+                (key): Linear(in_features=768, out_features=768, bias=True)
+                (value): Linear(in_features=768, out_features=768, bias=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+              (output): RobertaSelfOutput(
+                (dense): Linear(in_features=768, out_features=768, bias=True)
+                (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+                (dropout): Dropout(p=0.1, inplace=False)
+              )
+            )
+            (intermediate): RobertaIntermediate(
+              (dense): Linear(in_features=768, out_features=3072, bias=True)
+            )
+            (output): RobertaOutput(
+              (dense): Linear(in_features=3072, out_features=768, bias=True)
+              (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
+              (dropout): Dropout(p=0.1, inplace=False)
+            )
+          )
+        )
+      )
+      (pooler): RobertaPooler(
+        (dense): Linear(in_features=768, out_features=768, bias=True)
+        (activation): Tanh()
+      )
+    )
+  )
+  (word_dropout): WordDropout(p=0.05)
+  (locked_dropout): LockedDropout(p=0.5)
+  (linear): Linear(in_features=768, out_features=51, bias=True)
+  (beta): 1.0
+  (weights): None
+  (weight_tensor) None
+)"
+2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:17,506 Corpus: "Corpus: 5642 train + 195 dev + 649 test sentences"
+2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:17,506 Parameters:
+2022-02-04 12:53:17,506  - learning_rate: "5e-06"
+2022-02-04 12:53:17,506  - mini_batch_size: "32"
+2022-02-04 12:53:17,506  - patience: "3"
+2022-02-04 12:53:17,506  - anneal_factor: "0.5"
+2022-02-04 12:53:17,506  - max_epochs: "10"
+2022-02-04 12:53:17,506  - shuffle: "True"
+2022-02-04 12:53:17,506  - train_with_dev: "False"
+2022-02-04 12:53:17,506  - batch_growth_annealing: "False"
+2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:17,506 Model training base path: "resources/taggers/pos-camembert"
+2022-02-04 12:53:17,506 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:17,511 Device: cuda:0
+2022-02-04 12:53:17,511 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:17,511 Embeddings storage mode: none
+2022-02-04 12:53:17,513 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:53:38,315 epoch 1 - iter 17/177 - loss 3.96872255 - samples/sec: 26.15 - lr: 0.000000
+2022-02-04 12:53:54,561 epoch 1 - iter 34/177 - loss 3.96629180 - samples/sec: 33.49 - lr: 0.000001
+2022-02-04 12:54:11,140 epoch 1 - iter 51/177 - loss 3.95985736 - samples/sec: 32.82 - lr: 0.000001
+2022-02-04 12:54:27,471 epoch 1 - iter 68/177 - loss 3.95248851 - samples/sec: 33.31 - lr: 0.000002
+2022-02-04 12:54:44,574 epoch 1 - iter 85/177 - loss 3.94223845 - samples/sec: 31.81 - lr: 0.000002
+2022-02-04 12:54:59,811 epoch 1 - iter 102/177 - loss 3.93034373 - samples/sec: 35.71 - lr: 0.000003
+2022-02-04 12:55:17,140 epoch 1 - iter 119/177 - loss 3.91667895 - samples/sec: 31.39 - lr: 0.000003
+2022-02-04 12:55:33,245 epoch 1 - iter 136/177 - loss 3.90088222 - samples/sec: 33.78 - lr: 0.000004
+2022-02-04 12:55:48,743 epoch 1 - iter 153/177 - loss 3.87766994 - samples/sec: 35.11 - lr: 0.000004
+2022-02-04 12:56:06,269 epoch 1 - iter 170/177 - loss 3.84880099 - samples/sec: 31.04 - lr: 0.000005
+2022-02-04 12:56:12,033 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:56:12,033 EPOCH 1 done: loss 3.8419 - lr 0.0000050
+2022-02-04 12:56:18,260 DEV : loss 3.509683847427368 - f1-score (micro avg)  0.3053
+2022-02-04 12:56:18,262 BAD EPOCHS (no improvement): 4
+2022-02-04 12:56:18,285 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:56:35,575 epoch 2 - iter 17/177 - loss 3.54034313 - samples/sec: 31.47 - lr: 0.000005
+2022-02-04 12:56:52,475 epoch 2 - iter 34/177 - loss 3.50300407 - samples/sec: 32.19 - lr: 0.000005
+2022-02-04 12:57:09,058 epoch 2 - iter 51/177 - loss 3.46864739 - samples/sec: 32.81 - lr: 0.000005
+2022-02-04 12:57:25,624 epoch 2 - iter 68/177 - loss 3.43125430 - samples/sec: 32.84 - lr: 0.000005
+2022-02-04 12:57:42,941 epoch 2 - iter 85/177 - loss 3.39270879 - samples/sec: 31.42 - lr: 0.000005
+2022-02-04 12:57:59,153 epoch 2 - iter 102/177 - loss 3.35791389 - samples/sec: 33.56 - lr: 0.000005
+2022-02-04 12:58:16,864 epoch 2 - iter 119/177 - loss 3.32573531 - samples/sec: 30.72 - lr: 0.000005
+2022-02-04 12:58:34,354 epoch 2 - iter 136/177 - loss 3.29370429 - samples/sec: 31.11 - lr: 0.000005
+2022-02-04 12:58:51,116 epoch 2 - iter 153/177 - loss 3.26367901 - samples/sec: 32.46 - lr: 0.000005
+2022-02-04 12:59:08,117 epoch 2 - iter 170/177 - loss 3.23382669 - samples/sec: 32.00 - lr: 0.000004
+2022-02-04 12:59:15,072 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:59:15,074 EPOCH 2 done: loss 3.2228 - lr 0.0000044
+2022-02-04 12:59:20,452 DEV : loss 2.775869846343994 - f1-score (micro avg)  0.6141
+2022-02-04 12:59:20,455 BAD EPOCHS (no improvement): 4
+2022-02-04 12:59:20,455 ----------------------------------------------------------------------------------------------------
+2022-02-04 12:59:38,069 epoch 3 - iter 17/177 - loss 2.92343717 - samples/sec: 30.89 - lr: 0.000004
+2022-02-04 12:59:54,400 epoch 3 - iter 34/177 - loss 2.90201388 - samples/sec: 33.32 - lr: 0.000004
+2022-02-04 13:00:12,150 epoch 3 - iter 51/177 - loss 2.88495451 - samples/sec: 30.65 - lr: 0.000004
+2022-02-04 13:00:28,960 epoch 3 - iter 68/177 - loss 2.86475060 - samples/sec: 32.37 - lr: 0.000004
+2022-02-04 13:00:47,016 epoch 3 - iter 85/177 - loss 2.84779479 - samples/sec: 30.13 - lr: 0.000004
+2022-02-04 13:01:03,811 epoch 3 - iter 102/177 - loss 2.83018073 - samples/sec: 32.40 - lr: 0.000004
+2022-02-04 13:01:19,598 epoch 3 - iter 119/177 - loss 2.81577196 - samples/sec: 34.47 - lr: 0.000004
+2022-02-04 13:01:36,746 epoch 3 - iter 136/177 - loss 2.80310518 - samples/sec: 31.73 - lr: 0.000004
+2022-02-04 13:01:53,532 epoch 3 - iter 153/177 - loss 2.79075673 - samples/sec: 32.41 - lr: 0.000004
+2022-02-04 13:02:11,809 epoch 3 - iter 170/177 - loss 2.77624103 - samples/sec: 29.77 - lr: 0.000004
+2022-02-04 13:02:17,990 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:02:17,991 EPOCH 3 done: loss 2.7701 - lr 0.0000039
+2022-02-04 13:02:23,777 DEV : loss 2.410931348800659 - f1-score (micro avg)  0.819
+2022-02-04 13:02:23,780 BAD EPOCHS (no improvement): 4
+2022-02-04 13:02:23,781 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:02:41,231 epoch 4 - iter 17/177 - loss 2.60188784 - samples/sec: 31.18 - lr: 0.000004
+2022-02-04 13:02:58,635 epoch 4 - iter 34/177 - loss 2.59095213 - samples/sec: 31.26 - lr: 0.000004
+2022-02-04 13:03:15,040 epoch 4 - iter 51/177 - loss 2.58502577 - samples/sec: 33.17 - lr: 0.000004
+2022-02-04 13:03:32,700 epoch 4 - iter 68/177 - loss 2.57149732 - samples/sec: 30.81 - lr: 0.000004
+2022-02-04 13:03:49,889 epoch 4 - iter 85/177 - loss 2.55924475 - samples/sec: 31.65 - lr: 0.000004
+2022-02-04 13:04:07,257 epoch 4 - iter 102/177 - loss 2.54972860 - samples/sec: 31.33 - lr: 0.000004
+2022-02-04 13:04:24,141 epoch 4 - iter 119/177 - loss 2.54070048 - samples/sec: 32.23 - lr: 0.000004
+2022-02-04 13:04:40,320 epoch 4 - iter 136/177 - loss 2.53210863 - samples/sec: 33.69 - lr: 0.000003
+2022-02-04 13:04:57,281 epoch 4 - iter 153/177 - loss 2.52441237 - samples/sec: 32.08 - lr: 0.000003
+2022-02-04 13:05:15,246 epoch 4 - iter 170/177 - loss 2.51520228 - samples/sec: 30.29 - lr: 0.000003
+2022-02-04 13:05:21,452 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:05:21,458 EPOCH 4 done: loss 2.5123 - lr 0.0000033
+2022-02-04 13:05:27,295 DEV : loss 2.1908302307128906 - f1-score (micro avg)  0.8605
+2022-02-04 13:05:27,310 BAD EPOCHS (no improvement): 4
+2022-02-04 13:05:27,310 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:05:44,024 epoch 5 - iter 17/177 - loss 2.39887737 - samples/sec: 32.55 - lr: 0.000003
+2022-02-04 13:06:01,687 epoch 5 - iter 34/177 - loss 2.39948538 - samples/sec: 30.80 - lr: 0.000003
+2022-02-04 13:06:19,664 epoch 5 - iter 51/177 - loss 2.40078878 - samples/sec: 30.29 - lr: 0.000003
+2022-02-04 13:06:36,241 epoch 5 - iter 68/177 - loss 2.39524823 - samples/sec: 32.93 - lr: 0.000003
+2022-02-04 13:06:52,683 epoch 5 - iter 85/177 - loss 2.38764769 - samples/sec: 33.17 - lr: 0.000003
+2022-02-04 13:07:09,718 epoch 5 - iter 102/177 - loss 2.38104055 - samples/sec: 31.94 - lr: 0.000003
+2022-02-04 13:07:26,578 epoch 5 - iter 119/177 - loss 2.37384530 - samples/sec: 32.29 - lr: 0.000003
+2022-02-04 13:07:42,599 epoch 5 - iter 136/177 - loss 2.36823710 - samples/sec: 33.96 - lr: 0.000003
+2022-02-04 13:08:00,031 epoch 5 - iter 153/177 - loss 2.36030726 - samples/sec: 31.25 - lr: 0.000003
+2022-02-04 13:08:17,779 epoch 5 - iter 170/177 - loss 2.35368343 - samples/sec: 30.72 - lr: 0.000003
+2022-02-04 13:08:24,110 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:08:24,111 EPOCH 5 done: loss 2.3509 - lr 0.0000028
+2022-02-04 13:08:30,298 DEV : loss 2.0516607761383057 - f1-score (micro avg)  0.8737
+2022-02-04 13:08:30,301 BAD EPOCHS (no improvement): 4
+2022-02-04 13:08:30,301 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:08:46,667 epoch 6 - iter 17/177 - loss 2.27743160 - samples/sec: 33.25 - lr: 0.000003
+2022-02-04 13:09:04,814 epoch 6 - iter 34/177 - loss 2.27286852 - samples/sec: 29.99 - lr: 0.000003
+2022-02-04 13:09:21,239 epoch 6 - iter 51/177 - loss 2.27175336 - samples/sec: 33.23 - lr: 0.000003
+2022-02-04 13:09:38,163 epoch 6 - iter 68/177 - loss 2.26491131 - samples/sec: 32.15 - lr: 0.000003
+2022-02-04 13:09:54,338 epoch 6 - iter 85/177 - loss 2.25999023 - samples/sec: 33.65 - lr: 0.000003
+2022-02-04 13:10:12,270 epoch 6 - iter 102/177 - loss 2.25580949 - samples/sec: 30.38 - lr: 0.000002
+2022-02-04 13:10:29,245 epoch 6 - iter 119/177 - loss 2.25275307 - samples/sec: 32.13 - lr: 0.000002
+2022-02-04 13:10:46,065 epoch 6 - iter 136/177 - loss 2.24661845 - samples/sec: 32.40 - lr: 0.000002
+2022-02-04 13:11:03,357 epoch 6 - iter 153/177 - loss 2.24241040 - samples/sec: 31.47 - lr: 0.000002
+2022-02-04 13:11:22,211 epoch 6 - iter 170/177 - loss 2.23773462 - samples/sec: 28.87 - lr: 0.000002
+2022-02-04 13:11:28,309 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:11:28,321 EPOCH 6 done: loss 2.2366 - lr 0.0000022
+2022-02-04 13:11:34,136 DEV : loss 1.9612011909484863 - f1-score (micro avg)  0.884
+2022-02-04 13:11:34,150 BAD EPOCHS (no improvement): 4
+2022-02-04 13:11:34,151 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:11:50,446 epoch 7 - iter 17/177 - loss 2.19566504 - samples/sec: 33.39 - lr: 0.000002
+2022-02-04 13:12:06,851 epoch 7 - iter 34/177 - loss 2.19802945 - samples/sec: 33.21 - lr: 0.000002
+2022-02-04 13:12:23,401 epoch 7 - iter 51/177 - loss 2.19405535 - samples/sec: 32.88 - lr: 0.000002
+2022-02-04 13:12:41,303 epoch 7 - iter 68/177 - loss 2.19162087 - samples/sec: 30.39 - lr: 0.000002
+2022-02-04 13:12:58,144 epoch 7 - iter 85/177 - loss 2.18471516 - samples/sec: 32.35 - lr: 0.000002
+2022-02-04 13:13:16,467 epoch 7 - iter 102/177 - loss 2.18080579 - samples/sec: 29.75 - lr: 0.000002
+2022-02-04 13:13:34,031 epoch 7 - iter 119/177 - loss 2.17936921 - samples/sec: 31.00 - lr: 0.000002
+2022-02-04 13:13:51,077 epoch 7 - iter 136/177 - loss 2.17514038 - samples/sec: 32.02 - lr: 0.000002
+2022-02-04 13:14:07,857 epoch 7 - iter 153/177 - loss 2.17141812 - samples/sec: 32.48 - lr: 0.000002
+2022-02-04 13:14:25,422 epoch 7 - iter 170/177 - loss 2.16711471 - samples/sec: 30.99 - lr: 0.000002
+2022-02-04 13:14:31,227 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:14:31,228 EPOCH 7 done: loss 2.1662 - lr 0.0000017
+2022-02-04 13:14:37,035 DEV : loss 1.8981177806854248 - f1-score (micro avg)  0.9008
+2022-02-04 13:14:37,049 BAD EPOCHS (no improvement): 4
+2022-02-04 13:14:37,050 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:14:54,867 epoch 8 - iter 17/177 - loss 2.13839948 - samples/sec: 30.54 - lr: 0.000002
+2022-02-04 13:15:11,283 epoch 8 - iter 34/177 - loss 2.13301605 - samples/sec: 33.16 - lr: 0.000002
+2022-02-04 13:15:28,761 epoch 8 - iter 51/177 - loss 2.12335776 - samples/sec: 31.15 - lr: 0.000002
+2022-02-04 13:15:44,480 epoch 8 - iter 68/177 - loss 2.12525500 - samples/sec: 34.61 - lr: 0.000001
+2022-02-04 13:16:01,084 epoch 8 - iter 85/177 - loss 2.12100353 - samples/sec: 32.77 - lr: 0.000001
+2022-02-04 13:16:17,945 epoch 8 - iter 102/177 - loss 2.12081652 - samples/sec: 32.27 - lr: 0.000001
+2022-02-04 13:16:34,469 epoch 8 - iter 119/177 - loss 2.11872473 - samples/sec: 32.93 - lr: 0.000001
+2022-02-04 13:16:50,308 epoch 8 - iter 136/177 - loss 2.11635062 - samples/sec: 34.35 - lr: 0.000001
+2022-02-04 13:17:07,313 epoch 8 - iter 153/177 - loss 2.11371370 - samples/sec: 32.00 - lr: 0.000001
+2022-02-04 13:17:25,553 epoch 8 - iter 170/177 - loss 2.11100152 - samples/sec: 29.83 - lr: 0.000001
+2022-02-04 13:17:33,472 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:17:33,473 EPOCH 8 done: loss 2.1112 - lr 0.0000011
+2022-02-04 13:17:39,308 DEV : loss 1.8548760414123535 - f1-score (micro avg)  0.9117
+2022-02-04 13:17:39,311 BAD EPOCHS (no improvement): 4
+2022-02-04 13:17:39,311 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:17:56,622 epoch 9 - iter 17/177 - loss 2.06819398 - samples/sec: 31.43 - lr: 0.000001
+2022-02-04 13:18:13,360 epoch 9 - iter 34/177 - loss 2.07590305 - samples/sec: 32.51 - lr: 0.000001
+2022-02-04 13:18:31,366 epoch 9 - iter 51/177 - loss 2.07666788 - samples/sec: 30.22 - lr: 0.000001
+2022-02-04 13:18:49,983 epoch 9 - iter 68/177 - loss 2.07961625 - samples/sec: 29.23 - lr: 0.000001
+2022-02-04 13:19:06,239 epoch 9 - iter 85/177 - loss 2.08063462 - samples/sec: 33.47 - lr: 0.000001
+2022-02-04 13:19:23,068 epoch 9 - iter 102/177 - loss 2.08002246 - samples/sec: 32.33 - lr: 0.000001
+2022-02-04 13:19:40,188 epoch 9 - iter 119/177 - loss 2.07956869 - samples/sec: 31.78 - lr: 0.000001
+2022-02-04 13:19:57,482 epoch 9 - iter 136/177 - loss 2.07835867 - samples/sec: 31.47 - lr: 0.000001
+2022-02-04 13:20:14,155 epoch 9 - iter 153/177 - loss 2.07750905 - samples/sec: 32.64 - lr: 0.000001
+2022-02-04 13:20:31,533 epoch 9 - iter 170/177 - loss 2.07545212 - samples/sec: 31.31 - lr: 0.000001
+2022-02-04 13:20:37,466 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:20:37,468 EPOCH 9 done: loss 2.0759 - lr 0.0000006
+2022-02-04 13:20:43,299 DEV : loss 1.830302357673645 - f1-score (micro avg)  0.9161
+2022-02-04 13:20:43,314 BAD EPOCHS (no improvement): 4
+2022-02-04 13:20:43,314 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:21:00,247 epoch 10 - iter 17/177 - loss 2.06625894 - samples/sec: 32.13 - lr: 0.000001
+2022-02-04 13:21:16,847 epoch 10 - iter 34/177 - loss 2.06850742 - samples/sec: 32.78 - lr: 0.000000
+2022-02-04 13:21:34,047 epoch 10 - iter 51/177 - loss 2.06653386 - samples/sec: 31.68 - lr: 0.000000
+2022-02-04 13:21:50,597 epoch 10 - iter 68/177 - loss 2.06650174 - samples/sec: 32.88 - lr: 0.000000
+2022-02-04 13:22:07,286 epoch 10 - iter 85/177 - loss 2.06409229 - samples/sec: 32.61 - lr: 0.000000
+2022-02-04 13:22:25,744 epoch 10 - iter 102/177 - loss 2.06162033 - samples/sec: 29.48 - lr: 0.000000
+2022-02-04 13:22:43,419 epoch 10 - iter 119/177 - loss 2.06248176 - samples/sec: 30.78 - lr: 0.000000
+2022-02-04 13:22:59,502 epoch 10 - iter 136/177 - loss 2.06392395 - samples/sec: 33.83 - lr: 0.000000
+2022-02-04 13:23:16,396 epoch 10 - iter 153/177 - loss 2.06446242 - samples/sec: 32.21 - lr: 0.000000
+2022-02-04 13:23:33,136 epoch 10 - iter 170/177 - loss 2.06210437 - samples/sec: 32.50 - lr: 0.000000
+2022-02-04 13:23:40,551 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:23:40,552 EPOCH 10 done: loss 2.0624 - lr 0.0000000
+2022-02-04 13:23:46,365 DEV : loss 1.8217284679412842 - f1-score (micro avg)  0.9195
+2022-02-04 13:23:46,367 BAD EPOCHS (no improvement): 4
+2022-02-04 13:23:47,542 ----------------------------------------------------------------------------------------------------
+2022-02-04 13:23:47,544 Testing using last state of model ...
+2022-02-04 13:24:07,461 0.9181	0.9181	0.9181	0.9181
+2022-02-04 13:24:07,462
+Results:
+- F-score (micro) 0.9181
+- F-score (macro) 0.439
+- Accuracy 0.9181
+By class:
+              precision    recall  f1-score   support
+      NOMcom     0.9530    0.9808    0.9667      2130
+      VERcjg     0.9683    0.9935    0.9807      1535
+         PRE     0.8411    0.9940    0.9112      1331
+      PROper     0.9253    0.9963    0.9595      1368
+      PONfbl     0.9824    0.9993    0.9908      1341
+      ADVgen     0.8179    0.8276    0.8227       841
+      PONfrt     0.9721    1.0000    0.9859       662
+      DETdef     0.9393    0.9967    0.9672       606
+      ADJqua     0.8289    0.9400    0.8810       500
+      VERinf     0.9706    0.9960    0.9831       497
+      DETpos     0.9791    0.9979    0.9884       469
+      CONcoo     0.9645    0.9935    0.9788       465
+      CONsub     0.7437    0.9846    0.8473       389
+      VERppe     0.9042    0.9408    0.9221       321
+      DETndf     0.7270    0.9959    0.8405       246
+      NOMpro     0.9485    0.8340    0.8876       265
+      PROrel     0.9398    0.7519    0.8354       270
+      ADVneg     0.9577    0.7528    0.8430       271
+      DETdem     0.9934    0.9742    0.9837       155
+      PROind     1.0000    0.4894    0.6571       188
+      PROadv     0.9000    0.8108    0.8531       111
+      PROdem     1.0000    0.6387    0.7795       119
+      DETind     0.8000    0.7347    0.7660        98
+  PRE.DETdef     0.0000    0.0000    0.0000       183
+      VERppa     0.0000    0.0000    0.0000        67
+      PROimp     0.0000    0.0000    0.0000        54
+         INJ     0.0000    0.0000    0.0000        35
+      DETcar     0.0000    0.0000    0.0000        31
+      ADJind     0.0000    0.0000    0.0000        30
+      PROint     0.0000    0.0000    0.0000        22
+      ADJcar     0.0000    0.0000    0.0000        21
+      PROcar     0.0000    0.0000    0.0000        18
+      DETrel     0.0000    0.0000    0.0000        16
+      ADJord     0.0000    0.0000    0.0000        16
+      PONpga     0.0000    0.0000    0.0000        16
+      PROpos     0.0000    0.0000    0.0000        14
+      PONpdr     0.0000    0.0000    0.0000        13
+      DETint     0.0000    0.0000    0.0000        10
+      PONpxx     0.0000    0.0000    0.0000         6
+      ADVint     0.0000    0.0000    0.0000         5
+  PRE.PROrel     0.0000    0.0000    0.0000         2
+       latin     0.0000    0.0000    0.0000         2
+      PROord     0.0000    0.0000    0.0000         1
+  PRE.PROdem     0.0000    0.0000    0.0000         1
+  PRE.NOMcom     0.0000    0.0000    0.0000         1
+         ETR     0.0000    0.0000    0.0000         1
+      ADVsub     0.0000    0.0000    0.0000         1
+   micro avg     0.9181    0.9181    0.9181     14744
+   macro avg     0.4480    0.4388    0.4390     14744
+weighted avg     0.8876    0.9181    0.8991     14744
+ samples avg     0.9181    0.9181    0.9181     14744
+2022-02-04 13:24:07,477 ----------------------------------------------------------------------------------------------------

weights.txt ADDED Viewed

File without changes