gazquez commited on
Commit
c588f54
1 Parent(s): 4f09e4c

Added model without flair embeddings

Browse files
Files changed (3) hide show
  1. loss.tsv +2 -2
  2. pytorch_model.bin +2 -2
  3. training.log +313 -331
loss.tsv CHANGED
@@ -1,3 +1,3 @@
1
  EPOCH TIMESTAMP BAD_EPOCHS LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
- 1 00:43:17 0 0.0100 0.189008099097259 0.06317088007926941 0.9458 0.9717 0.9585 0.9206
3
- 2 01:01:40 0 0.0100 0.09155321446036903 0.04371843859553337 0.9713 0.9785 0.9749 0.9511
 
1
  EPOCH TIMESTAMP BAD_EPOCHS LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
2
+ 1 14:24:53 0 0.0100 0.291245240352544 0.06397613137960434 0.9724 0.9736 0.973 0.9477
3
+ 2 14:42:51 0 0.0100 0.13731835639464673 0.05747831612825394 0.9826 0.9863 0.9844 0.9696
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba47fe584e142b59f730a6261f9803f7ece0667ea1a48948c84803fb33ffeb9
3
- size 754011805
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c623f10dba949ae162389713d32ce968220b060cfad3fdb180300495a7f35cc
3
+ size 714487533
training.log CHANGED
@@ -1,408 +1,390 @@
1
- 2022-10-01 00:23:25,105 ----------------------------------------------------------------------------------------------------
2
- 2022-10-01 00:23:25,107 Model: "SequenceTagger(
3
- (embeddings): StackedEmbeddings(
4
- (list_embedding_0): TransformerWordEmbeddings(
5
- (model): BertModel(
6
- (embeddings): BertEmbeddings(
7
- (word_embeddings): Embedding(119547, 768, padding_idx=0)
8
- (position_embeddings): Embedding(512, 768)
9
- (token_type_embeddings): Embedding(2, 768)
10
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
11
- (dropout): Dropout(p=0.1, inplace=False)
12
- )
13
- (encoder): BertEncoder(
14
- (layer): ModuleList(
15
- (0): BertLayer(
16
- (attention): BertAttention(
17
- (self): BertSelfAttention(
18
- (query): Linear(in_features=768, out_features=768, bias=True)
19
- (key): Linear(in_features=768, out_features=768, bias=True)
20
- (value): Linear(in_features=768, out_features=768, bias=True)
21
- (dropout): Dropout(p=0.1, inplace=False)
22
- )
23
- (output): BertSelfOutput(
24
- (dense): Linear(in_features=768, out_features=768, bias=True)
25
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
26
- (dropout): Dropout(p=0.1, inplace=False)
27
- )
28
- )
29
- (intermediate): BertIntermediate(
30
- (dense): Linear(in_features=768, out_features=3072, bias=True)
31
- (intermediate_act_fn): GELUActivation()
32
  )
33
- (output): BertOutput(
34
- (dense): Linear(in_features=3072, out_features=768, bias=True)
35
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
36
  (dropout): Dropout(p=0.1, inplace=False)
37
  )
38
  )
39
- (1): BertLayer(
40
- (attention): BertAttention(
41
- (self): BertSelfAttention(
42
- (query): Linear(in_features=768, out_features=768, bias=True)
43
- (key): Linear(in_features=768, out_features=768, bias=True)
44
- (value): Linear(in_features=768, out_features=768, bias=True)
45
- (dropout): Dropout(p=0.1, inplace=False)
46
- )
47
- (output): BertSelfOutput(
48
- (dense): Linear(in_features=768, out_features=768, bias=True)
49
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
50
- (dropout): Dropout(p=0.1, inplace=False)
51
- )
52
- )
53
- (intermediate): BertIntermediate(
54
- (dense): Linear(in_features=768, out_features=3072, bias=True)
55
- (intermediate_act_fn): GELUActivation()
56
  )
57
- (output): BertOutput(
58
- (dense): Linear(in_features=3072, out_features=768, bias=True)
59
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
60
  (dropout): Dropout(p=0.1, inplace=False)
61
  )
62
  )
63
- (2): BertLayer(
64
- (attention): BertAttention(
65
- (self): BertSelfAttention(
66
- (query): Linear(in_features=768, out_features=768, bias=True)
67
- (key): Linear(in_features=768, out_features=768, bias=True)
68
- (value): Linear(in_features=768, out_features=768, bias=True)
69
- (dropout): Dropout(p=0.1, inplace=False)
70
- )
71
- (output): BertSelfOutput(
72
- (dense): Linear(in_features=768, out_features=768, bias=True)
73
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
74
- (dropout): Dropout(p=0.1, inplace=False)
75
- )
76
- )
77
- (intermediate): BertIntermediate(
78
- (dense): Linear(in_features=768, out_features=3072, bias=True)
79
- (intermediate_act_fn): GELUActivation()
80
  )
81
- (output): BertOutput(
82
- (dense): Linear(in_features=3072, out_features=768, bias=True)
83
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
84
  (dropout): Dropout(p=0.1, inplace=False)
85
  )
86
  )
87
- (3): BertLayer(
88
- (attention): BertAttention(
89
- (self): BertSelfAttention(
90
- (query): Linear(in_features=768, out_features=768, bias=True)
91
- (key): Linear(in_features=768, out_features=768, bias=True)
92
- (value): Linear(in_features=768, out_features=768, bias=True)
93
- (dropout): Dropout(p=0.1, inplace=False)
94
- )
95
- (output): BertSelfOutput(
96
- (dense): Linear(in_features=768, out_features=768, bias=True)
97
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
98
- (dropout): Dropout(p=0.1, inplace=False)
99
- )
100
- )
101
- (intermediate): BertIntermediate(
102
- (dense): Linear(in_features=768, out_features=3072, bias=True)
103
- (intermediate_act_fn): GELUActivation()
104
  )
105
- (output): BertOutput(
106
- (dense): Linear(in_features=3072, out_features=768, bias=True)
107
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
108
  (dropout): Dropout(p=0.1, inplace=False)
109
  )
110
  )
111
- (4): BertLayer(
112
- (attention): BertAttention(
113
- (self): BertSelfAttention(
114
- (query): Linear(in_features=768, out_features=768, bias=True)
115
- (key): Linear(in_features=768, out_features=768, bias=True)
116
- (value): Linear(in_features=768, out_features=768, bias=True)
117
- (dropout): Dropout(p=0.1, inplace=False)
118
- )
119
- (output): BertSelfOutput(
120
- (dense): Linear(in_features=768, out_features=768, bias=True)
121
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
122
- (dropout): Dropout(p=0.1, inplace=False)
123
- )
124
- )
125
- (intermediate): BertIntermediate(
126
- (dense): Linear(in_features=768, out_features=3072, bias=True)
127
- (intermediate_act_fn): GELUActivation()
128
  )
129
- (output): BertOutput(
130
- (dense): Linear(in_features=3072, out_features=768, bias=True)
131
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
132
  (dropout): Dropout(p=0.1, inplace=False)
133
  )
134
  )
135
- (5): BertLayer(
136
- (attention): BertAttention(
137
- (self): BertSelfAttention(
138
- (query): Linear(in_features=768, out_features=768, bias=True)
139
- (key): Linear(in_features=768, out_features=768, bias=True)
140
- (value): Linear(in_features=768, out_features=768, bias=True)
141
- (dropout): Dropout(p=0.1, inplace=False)
142
- )
143
- (output): BertSelfOutput(
144
- (dense): Linear(in_features=768, out_features=768, bias=True)
145
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
146
- (dropout): Dropout(p=0.1, inplace=False)
147
- )
148
- )
149
- (intermediate): BertIntermediate(
150
- (dense): Linear(in_features=768, out_features=3072, bias=True)
151
- (intermediate_act_fn): GELUActivation()
152
  )
153
- (output): BertOutput(
154
- (dense): Linear(in_features=3072, out_features=768, bias=True)
155
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
156
  (dropout): Dropout(p=0.1, inplace=False)
157
  )
158
  )
159
- (6): BertLayer(
160
- (attention): BertAttention(
161
- (self): BertSelfAttention(
162
- (query): Linear(in_features=768, out_features=768, bias=True)
163
- (key): Linear(in_features=768, out_features=768, bias=True)
164
- (value): Linear(in_features=768, out_features=768, bias=True)
165
- (dropout): Dropout(p=0.1, inplace=False)
166
- )
167
- (output): BertSelfOutput(
168
- (dense): Linear(in_features=768, out_features=768, bias=True)
169
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
170
- (dropout): Dropout(p=0.1, inplace=False)
171
- )
172
- )
173
- (intermediate): BertIntermediate(
174
- (dense): Linear(in_features=768, out_features=3072, bias=True)
175
- (intermediate_act_fn): GELUActivation()
176
  )
177
- (output): BertOutput(
178
- (dense): Linear(in_features=3072, out_features=768, bias=True)
179
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
180
  (dropout): Dropout(p=0.1, inplace=False)
181
  )
182
  )
183
- (7): BertLayer(
184
- (attention): BertAttention(
185
- (self): BertSelfAttention(
186
- (query): Linear(in_features=768, out_features=768, bias=True)
187
- (key): Linear(in_features=768, out_features=768, bias=True)
188
- (value): Linear(in_features=768, out_features=768, bias=True)
189
- (dropout): Dropout(p=0.1, inplace=False)
190
- )
191
- (output): BertSelfOutput(
192
- (dense): Linear(in_features=768, out_features=768, bias=True)
193
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
194
- (dropout): Dropout(p=0.1, inplace=False)
195
- )
196
- )
197
- (intermediate): BertIntermediate(
198
- (dense): Linear(in_features=768, out_features=3072, bias=True)
199
- (intermediate_act_fn): GELUActivation()
200
  )
201
- (output): BertOutput(
202
- (dense): Linear(in_features=3072, out_features=768, bias=True)
203
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
204
  (dropout): Dropout(p=0.1, inplace=False)
205
  )
206
  )
207
- (8): BertLayer(
208
- (attention): BertAttention(
209
- (self): BertSelfAttention(
210
- (query): Linear(in_features=768, out_features=768, bias=True)
211
- (key): Linear(in_features=768, out_features=768, bias=True)
212
- (value): Linear(in_features=768, out_features=768, bias=True)
213
- (dropout): Dropout(p=0.1, inplace=False)
214
- )
215
- (output): BertSelfOutput(
216
- (dense): Linear(in_features=768, out_features=768, bias=True)
217
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
218
- (dropout): Dropout(p=0.1, inplace=False)
219
- )
220
- )
221
- (intermediate): BertIntermediate(
222
- (dense): Linear(in_features=768, out_features=3072, bias=True)
223
- (intermediate_act_fn): GELUActivation()
224
  )
225
- (output): BertOutput(
226
- (dense): Linear(in_features=3072, out_features=768, bias=True)
227
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
228
  (dropout): Dropout(p=0.1, inplace=False)
229
  )
230
  )
231
- (9): BertLayer(
232
- (attention): BertAttention(
233
- (self): BertSelfAttention(
234
- (query): Linear(in_features=768, out_features=768, bias=True)
235
- (key): Linear(in_features=768, out_features=768, bias=True)
236
- (value): Linear(in_features=768, out_features=768, bias=True)
237
- (dropout): Dropout(p=0.1, inplace=False)
238
- )
239
- (output): BertSelfOutput(
240
- (dense): Linear(in_features=768, out_features=768, bias=True)
241
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
242
- (dropout): Dropout(p=0.1, inplace=False)
243
- )
244
- )
245
- (intermediate): BertIntermediate(
246
- (dense): Linear(in_features=768, out_features=3072, bias=True)
247
- (intermediate_act_fn): GELUActivation()
248
  )
249
- (output): BertOutput(
250
- (dense): Linear(in_features=3072, out_features=768, bias=True)
251
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
252
  (dropout): Dropout(p=0.1, inplace=False)
253
  )
254
  )
255
- (10): BertLayer(
256
- (attention): BertAttention(
257
- (self): BertSelfAttention(
258
- (query): Linear(in_features=768, out_features=768, bias=True)
259
- (key): Linear(in_features=768, out_features=768, bias=True)
260
- (value): Linear(in_features=768, out_features=768, bias=True)
261
- (dropout): Dropout(p=0.1, inplace=False)
262
- )
263
- (output): BertSelfOutput(
264
- (dense): Linear(in_features=768, out_features=768, bias=True)
265
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
266
- (dropout): Dropout(p=0.1, inplace=False)
267
- )
268
- )
269
- (intermediate): BertIntermediate(
270
- (dense): Linear(in_features=768, out_features=3072, bias=True)
271
- (intermediate_act_fn): GELUActivation()
272
  )
273
- (output): BertOutput(
274
- (dense): Linear(in_features=3072, out_features=768, bias=True)
275
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
276
  (dropout): Dropout(p=0.1, inplace=False)
277
  )
278
  )
279
- (11): BertLayer(
280
- (attention): BertAttention(
281
- (self): BertSelfAttention(
282
- (query): Linear(in_features=768, out_features=768, bias=True)
283
- (key): Linear(in_features=768, out_features=768, bias=True)
284
- (value): Linear(in_features=768, out_features=768, bias=True)
285
- (dropout): Dropout(p=0.1, inplace=False)
286
- )
287
- (output): BertSelfOutput(
288
- (dense): Linear(in_features=768, out_features=768, bias=True)
289
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
290
- (dropout): Dropout(p=0.1, inplace=False)
291
- )
292
- )
293
- (intermediate): BertIntermediate(
294
- (dense): Linear(in_features=768, out_features=3072, bias=True)
295
- (intermediate_act_fn): GELUActivation()
296
  )
297
- (output): BertOutput(
298
- (dense): Linear(in_features=3072, out_features=768, bias=True)
299
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
300
  (dropout): Dropout(p=0.1, inplace=False)
301
  )
302
  )
 
 
 
 
 
 
 
 
 
303
  )
304
  )
305
- (pooler): BertPooler(
306
- (dense): Linear(in_features=768, out_features=768, bias=True)
307
- (activation): Tanh()
308
- )
309
- )
310
- )
311
- (list_embedding_1): FlairEmbeddings(
312
- (lm): LanguageModel(
313
- (drop): Dropout(p=0.5, inplace=False)
314
- (encoder): Embedding(275, 100)
315
- (rnn): LSTM(100, 1024)
316
- (decoder): Linear(in_features=1024, out_features=275, bias=True)
317
  )
318
- )
319
- (list_embedding_2): FlairEmbeddings(
320
- (lm): LanguageModel(
321
- (drop): Dropout(p=0.5, inplace=False)
322
- (encoder): Embedding(275, 100)
323
- (rnn): LSTM(100, 1024)
324
- (decoder): Linear(in_features=1024, out_features=275, bias=True)
325
  )
326
  )
327
  )
 
328
  (word_dropout): WordDropout(p=0.05)
329
  (locked_dropout): LockedDropout(p=0.5)
330
- (embedding2nn): Linear(in_features=2816, out_features=2816, bias=True)
331
- (linear): Linear(in_features=2816, out_features=13, bias=True)
332
  (loss_function): CrossEntropyLoss()
333
  )"
334
- 2022-10-01 00:23:25,114 ----------------------------------------------------------------------------------------------------
335
- 2022-10-01 00:23:25,115 Corpus: "Corpus: 70000 train + 15000 dev + 15000 test sentences"
336
- 2022-10-01 00:23:25,115 ----------------------------------------------------------------------------------------------------
337
- 2022-10-01 00:23:25,115 Parameters:
338
- 2022-10-01 00:23:25,116 - learning_rate: "0.010000"
339
- 2022-10-01 00:23:25,116 - mini_batch_size: "8"
340
- 2022-10-01 00:23:25,116 - patience: "3"
341
- 2022-10-01 00:23:25,116 - anneal_factor: "0.5"
342
- 2022-10-01 00:23:25,116 - max_epochs: "2"
343
- 2022-10-01 00:23:25,116 - shuffle: "True"
344
- 2022-10-01 00:23:25,117 - train_with_dev: "False"
345
- 2022-10-01 00:23:25,117 - batch_growth_annealing: "False"
346
- 2022-10-01 00:23:25,117 ----------------------------------------------------------------------------------------------------
347
- 2022-10-01 00:23:25,117 Model training base path: "c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\mix_trans_word"
348
- 2022-10-01 00:23:25,117 ----------------------------------------------------------------------------------------------------
349
- 2022-10-01 00:23:25,118 Device: cuda:0
350
- 2022-10-01 00:23:25,118 ----------------------------------------------------------------------------------------------------
351
- 2022-10-01 00:23:25,118 Embeddings storage mode: cpu
352
- 2022-10-01 00:23:25,119 ----------------------------------------------------------------------------------------------------
353
- 2022-10-01 00:25:10,652 epoch 1 - iter 875/8750 - loss 0.52734710 - samples/sec: 66.36 - lr: 0.010000
354
- 2022-10-01 00:26:56,050 epoch 1 - iter 1750/8750 - loss 0.40571165 - samples/sec: 66.45 - lr: 0.010000
355
- 2022-10-01 00:28:42,758 epoch 1 - iter 2625/8750 - loss 0.33981350 - samples/sec: 65.63 - lr: 0.010000
356
- 2022-10-01 00:30:27,826 epoch 1 - iter 3500/8750 - loss 0.29553411 - samples/sec: 66.66 - lr: 0.010000
357
- 2022-10-01 00:32:13,605 epoch 1 - iter 4375/8750 - loss 0.26472648 - samples/sec: 66.21 - lr: 0.010000
358
- 2022-10-01 00:33:58,962 epoch 1 - iter 5250/8750 - loss 0.24119392 - samples/sec: 66.47 - lr: 0.010000
359
- 2022-10-01 00:35:44,264 epoch 1 - iter 6125/8750 - loss 0.22350560 - samples/sec: 66.50 - lr: 0.010000
360
- 2022-10-01 00:37:29,676 epoch 1 - iter 7000/8750 - loss 0.20938707 - samples/sec: 66.43 - lr: 0.010000
361
- 2022-10-01 00:39:17,828 epoch 1 - iter 7875/8750 - loss 0.19801233 - samples/sec: 64.75 - lr: 0.010000
362
- 2022-10-01 00:41:05,621 epoch 1 - iter 8750/8750 - loss 0.18900810 - samples/sec: 64.98 - lr: 0.010000
363
- 2022-10-01 00:41:05,624 ----------------------------------------------------------------------------------------------------
364
- 2022-10-01 00:41:05,624 EPOCH 1 done: loss 0.1890 - lr 0.010000
365
- 2022-10-01 00:43:16,083 Evaluating as a multi-label problem: False
366
- 2022-10-01 00:43:16,227 DEV : loss 0.06317088007926941 - f1-score (micro avg) 0.9585
367
- 2022-10-01 00:43:17,308 BAD EPOCHS (no improvement): 0
368
- 2022-10-01 00:43:17,309 saving best model
369
- 2022-10-01 00:43:18,885 ----------------------------------------------------------------------------------------------------
370
- 2022-10-01 00:45:00,373 epoch 2 - iter 875/8750 - loss 0.09938527 - samples/sec: 69.02 - lr: 0.010000
371
- 2022-10-01 00:46:39,918 epoch 2 - iter 1750/8750 - loss 0.09782604 - samples/sec: 70.36 - lr: 0.010000
372
- 2022-10-01 00:48:19,288 epoch 2 - iter 2625/8750 - loss 0.09732946 - samples/sec: 70.50 - lr: 0.010000
373
- 2022-10-01 00:49:56,913 epoch 2 - iter 3500/8750 - loss 0.09652202 - samples/sec: 71.76 - lr: 0.010000
374
- 2022-10-01 00:51:35,781 epoch 2 - iter 4375/8750 - loss 0.09592801 - samples/sec: 70.86 - lr: 0.010000
375
- 2022-10-01 00:53:12,838 epoch 2 - iter 5250/8750 - loss 0.09478132 - samples/sec: 72.17 - lr: 0.010000
376
- 2022-10-01 00:54:49,247 epoch 2 - iter 6125/8750 - loss 0.09405506 - samples/sec: 72.65 - lr: 0.010000
377
- 2022-10-01 00:56:26,656 epoch 2 - iter 7000/8750 - loss 0.09270363 - samples/sec: 71.90 - lr: 0.010000
378
- 2022-10-01 00:58:04,050 epoch 2 - iter 7875/8750 - loss 0.09222568 - samples/sec: 71.92 - lr: 0.010000
379
- 2022-10-01 00:59:41,351 epoch 2 - iter 8750/8750 - loss 0.09155321 - samples/sec: 71.98 - lr: 0.010000
380
- 2022-10-01 00:59:41,359 ----------------------------------------------------------------------------------------------------
381
- 2022-10-01 00:59:41,360 EPOCH 2 done: loss 0.0916 - lr 0.010000
382
- 2022-10-01 01:01:38,941 Evaluating as a multi-label problem: False
383
- 2022-10-01 01:01:39,054 DEV : loss 0.04371843859553337 - f1-score (micro avg) 0.9749
384
- 2022-10-01 01:01:40,056 BAD EPOCHS (no improvement): 0
385
- 2022-10-01 01:01:40,058 saving best model
386
- 2022-10-01 01:01:42,979 ----------------------------------------------------------------------------------------------------
387
- 2022-10-01 01:01:42,986 loading file c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\mix_trans_word\best-model.pt
388
- 2022-10-01 01:01:46,879 SequenceTagger predicts: Dictionary with 13 tags: O, S-brand, B-brand, E-brand, I-brand, S-size, B-size, E-size, I-size, S-color, B-color, E-color, I-color
389
- 2022-10-01 01:03:40,258 Evaluating as a multi-label problem: False
390
- 2022-10-01 01:03:40,388 0.9719 0.9777 0.9748 0.951
391
- 2022-10-01 01:03:40,389
392
  Results:
393
- - F-score (micro) 0.9748
394
- - F-score (macro) 0.9624
395
- - Accuracy 0.951
396
 
397
  By class:
398
  precision recall f1-score support
399
 
400
- brand 0.9779 0.9849 0.9814 11779
401
- size 0.9780 0.9821 0.9800 3125
402
- color 0.9249 0.9264 0.9256 1915
403
 
404
- micro avg 0.9719 0.9777 0.9748 16819
405
- macro avg 0.9603 0.9644 0.9624 16819
406
- weighted avg 0.9719 0.9777 0.9748 16819
407
 
408
- 2022-10-01 01:03:40,391 ----------------------------------------------------------------------------------------------------
 
1
+ 2022-10-04 14:07:15,489 ----------------------------------------------------------------------------------------------------
2
+ 2022-10-04 14:07:15,492 Model: "SequenceTagger(
3
+ (embeddings): TransformerWordEmbeddings(
4
+ (model): BertModel(
5
+ (embeddings): BertEmbeddings(
6
+ (word_embeddings): Embedding(119547, 768, padding_idx=0)
7
+ (position_embeddings): Embedding(512, 768)
8
+ (token_type_embeddings): Embedding(2, 768)
9
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
10
+ (dropout): Dropout(p=0.1, inplace=False)
11
+ )
12
+ (encoder): BertEncoder(
13
+ (layer): ModuleList(
14
+ (0): BertLayer(
15
+ (attention): BertAttention(
16
+ (self): BertSelfAttention(
17
+ (query): Linear(in_features=768, out_features=768, bias=True)
18
+ (key): Linear(in_features=768, out_features=768, bias=True)
19
+ (value): Linear(in_features=768, out_features=768, bias=True)
20
+ (dropout): Dropout(p=0.1, inplace=False)
 
 
 
 
 
 
 
 
 
 
 
21
  )
22
+ (output): BertSelfOutput(
23
+ (dense): Linear(in_features=768, out_features=768, bias=True)
24
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
25
  (dropout): Dropout(p=0.1, inplace=False)
26
  )
27
  )
28
+ (intermediate): BertIntermediate(
29
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
30
+ (intermediate_act_fn): GELUActivation()
31
+ )
32
+ (output): BertOutput(
33
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
34
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
35
+ (dropout): Dropout(p=0.1, inplace=False)
36
+ )
37
+ )
38
+ (1): BertLayer(
39
+ (attention): BertAttention(
40
+ (self): BertSelfAttention(
41
+ (query): Linear(in_features=768, out_features=768, bias=True)
42
+ (key): Linear(in_features=768, out_features=768, bias=True)
43
+ (value): Linear(in_features=768, out_features=768, bias=True)
44
+ (dropout): Dropout(p=0.1, inplace=False)
45
  )
46
+ (output): BertSelfOutput(
47
+ (dense): Linear(in_features=768, out_features=768, bias=True)
48
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
49
  (dropout): Dropout(p=0.1, inplace=False)
50
  )
51
  )
52
+ (intermediate): BertIntermediate(
53
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
54
+ (intermediate_act_fn): GELUActivation()
55
+ )
56
+ (output): BertOutput(
57
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
58
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
59
+ (dropout): Dropout(p=0.1, inplace=False)
60
+ )
61
+ )
62
+ (2): BertLayer(
63
+ (attention): BertAttention(
64
+ (self): BertSelfAttention(
65
+ (query): Linear(in_features=768, out_features=768, bias=True)
66
+ (key): Linear(in_features=768, out_features=768, bias=True)
67
+ (value): Linear(in_features=768, out_features=768, bias=True)
68
+ (dropout): Dropout(p=0.1, inplace=False)
69
  )
70
+ (output): BertSelfOutput(
71
+ (dense): Linear(in_features=768, out_features=768, bias=True)
72
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
73
  (dropout): Dropout(p=0.1, inplace=False)
74
  )
75
  )
76
+ (intermediate): BertIntermediate(
77
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
78
+ (intermediate_act_fn): GELUActivation()
79
+ )
80
+ (output): BertOutput(
81
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
82
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
83
+ (dropout): Dropout(p=0.1, inplace=False)
84
+ )
85
+ )
86
+ (3): BertLayer(
87
+ (attention): BertAttention(
88
+ (self): BertSelfAttention(
89
+ (query): Linear(in_features=768, out_features=768, bias=True)
90
+ (key): Linear(in_features=768, out_features=768, bias=True)
91
+ (value): Linear(in_features=768, out_features=768, bias=True)
92
+ (dropout): Dropout(p=0.1, inplace=False)
93
  )
94
+ (output): BertSelfOutput(
95
+ (dense): Linear(in_features=768, out_features=768, bias=True)
96
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
97
  (dropout): Dropout(p=0.1, inplace=False)
98
  )
99
  )
100
+ (intermediate): BertIntermediate(
101
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
102
+ (intermediate_act_fn): GELUActivation()
103
+ )
104
+ (output): BertOutput(
105
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
106
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
107
+ (dropout): Dropout(p=0.1, inplace=False)
108
+ )
109
+ )
110
+ (4): BertLayer(
111
+ (attention): BertAttention(
112
+ (self): BertSelfAttention(
113
+ (query): Linear(in_features=768, out_features=768, bias=True)
114
+ (key): Linear(in_features=768, out_features=768, bias=True)
115
+ (value): Linear(in_features=768, out_features=768, bias=True)
116
+ (dropout): Dropout(p=0.1, inplace=False)
117
  )
118
+ (output): BertSelfOutput(
119
+ (dense): Linear(in_features=768, out_features=768, bias=True)
120
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
121
  (dropout): Dropout(p=0.1, inplace=False)
122
  )
123
  )
124
+ (intermediate): BertIntermediate(
125
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
126
+ (intermediate_act_fn): GELUActivation()
127
+ )
128
+ (output): BertOutput(
129
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
130
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
131
+ (dropout): Dropout(p=0.1, inplace=False)
132
+ )
133
+ )
134
+ (5): BertLayer(
135
+ (attention): BertAttention(
136
+ (self): BertSelfAttention(
137
+ (query): Linear(in_features=768, out_features=768, bias=True)
138
+ (key): Linear(in_features=768, out_features=768, bias=True)
139
+ (value): Linear(in_features=768, out_features=768, bias=True)
140
+ (dropout): Dropout(p=0.1, inplace=False)
141
  )
142
+ (output): BertSelfOutput(
143
+ (dense): Linear(in_features=768, out_features=768, bias=True)
144
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
145
  (dropout): Dropout(p=0.1, inplace=False)
146
  )
147
  )
148
+ (intermediate): BertIntermediate(
149
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
150
+ (intermediate_act_fn): GELUActivation()
151
+ )
152
+ (output): BertOutput(
153
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
154
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
155
+ (dropout): Dropout(p=0.1, inplace=False)
156
+ )
157
+ )
158
+ (6): BertLayer(
159
+ (attention): BertAttention(
160
+ (self): BertSelfAttention(
161
+ (query): Linear(in_features=768, out_features=768, bias=True)
162
+ (key): Linear(in_features=768, out_features=768, bias=True)
163
+ (value): Linear(in_features=768, out_features=768, bias=True)
164
+ (dropout): Dropout(p=0.1, inplace=False)
165
  )
166
+ (output): BertSelfOutput(
167
+ (dense): Linear(in_features=768, out_features=768, bias=True)
168
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
169
  (dropout): Dropout(p=0.1, inplace=False)
170
  )
171
  )
172
+ (intermediate): BertIntermediate(
173
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
174
+ (intermediate_act_fn): GELUActivation()
175
+ )
176
+ (output): BertOutput(
177
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
178
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
179
+ (dropout): Dropout(p=0.1, inplace=False)
180
+ )
181
+ )
182
+ (7): BertLayer(
183
+ (attention): BertAttention(
184
+ (self): BertSelfAttention(
185
+ (query): Linear(in_features=768, out_features=768, bias=True)
186
+ (key): Linear(in_features=768, out_features=768, bias=True)
187
+ (value): Linear(in_features=768, out_features=768, bias=True)
188
+ (dropout): Dropout(p=0.1, inplace=False)
189
  )
190
+ (output): BertSelfOutput(
191
+ (dense): Linear(in_features=768, out_features=768, bias=True)
192
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
193
  (dropout): Dropout(p=0.1, inplace=False)
194
  )
195
  )
196
+ (intermediate): BertIntermediate(
197
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
198
+ (intermediate_act_fn): GELUActivation()
199
+ )
200
+ (output): BertOutput(
201
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
202
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
203
+ (dropout): Dropout(p=0.1, inplace=False)
204
+ )
205
+ )
206
+ (8): BertLayer(
207
+ (attention): BertAttention(
208
+ (self): BertSelfAttention(
209
+ (query): Linear(in_features=768, out_features=768, bias=True)
210
+ (key): Linear(in_features=768, out_features=768, bias=True)
211
+ (value): Linear(in_features=768, out_features=768, bias=True)
212
+ (dropout): Dropout(p=0.1, inplace=False)
213
  )
214
+ (output): BertSelfOutput(
215
+ (dense): Linear(in_features=768, out_features=768, bias=True)
216
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
217
  (dropout): Dropout(p=0.1, inplace=False)
218
  )
219
  )
220
+ (intermediate): BertIntermediate(
221
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
222
+ (intermediate_act_fn): GELUActivation()
223
+ )
224
+ (output): BertOutput(
225
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
226
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
227
+ (dropout): Dropout(p=0.1, inplace=False)
228
+ )
229
+ )
230
+ (9): BertLayer(
231
+ (attention): BertAttention(
232
+ (self): BertSelfAttention(
233
+ (query): Linear(in_features=768, out_features=768, bias=True)
234
+ (key): Linear(in_features=768, out_features=768, bias=True)
235
+ (value): Linear(in_features=768, out_features=768, bias=True)
236
+ (dropout): Dropout(p=0.1, inplace=False)
237
  )
238
+ (output): BertSelfOutput(
239
+ (dense): Linear(in_features=768, out_features=768, bias=True)
240
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
241
  (dropout): Dropout(p=0.1, inplace=False)
242
  )
243
  )
244
+ (intermediate): BertIntermediate(
245
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
246
+ (intermediate_act_fn): GELUActivation()
247
+ )
248
+ (output): BertOutput(
249
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
250
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
251
+ (dropout): Dropout(p=0.1, inplace=False)
252
+ )
253
+ )
254
+ (10): BertLayer(
255
+ (attention): BertAttention(
256
+ (self): BertSelfAttention(
257
+ (query): Linear(in_features=768, out_features=768, bias=True)
258
+ (key): Linear(in_features=768, out_features=768, bias=True)
259
+ (value): Linear(in_features=768, out_features=768, bias=True)
260
+ (dropout): Dropout(p=0.1, inplace=False)
261
  )
262
+ (output): BertSelfOutput(
263
+ (dense): Linear(in_features=768, out_features=768, bias=True)
264
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
265
  (dropout): Dropout(p=0.1, inplace=False)
266
  )
267
  )
268
+ (intermediate): BertIntermediate(
269
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
270
+ (intermediate_act_fn): GELUActivation()
271
+ )
272
+ (output): BertOutput(
273
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
274
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
275
+ (dropout): Dropout(p=0.1, inplace=False)
276
+ )
277
+ )
278
+ (11): BertLayer(
279
+ (attention): BertAttention(
280
+ (self): BertSelfAttention(
281
+ (query): Linear(in_features=768, out_features=768, bias=True)
282
+ (key): Linear(in_features=768, out_features=768, bias=True)
283
+ (value): Linear(in_features=768, out_features=768, bias=True)
284
+ (dropout): Dropout(p=0.1, inplace=False)
285
  )
286
+ (output): BertSelfOutput(
287
+ (dense): Linear(in_features=768, out_features=768, bias=True)
288
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
289
  (dropout): Dropout(p=0.1, inplace=False)
290
  )
291
  )
292
+ (intermediate): BertIntermediate(
293
+ (dense): Linear(in_features=768, out_features=3072, bias=True)
294
+ (intermediate_act_fn): GELUActivation()
295
+ )
296
+ (output): BertOutput(
297
+ (dense): Linear(in_features=3072, out_features=768, bias=True)
298
+ (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
299
+ (dropout): Dropout(p=0.1, inplace=False)
300
+ )
301
  )
302
  )
 
 
 
 
 
 
 
 
 
 
 
 
303
  )
304
+ (pooler): BertPooler(
305
+ (dense): Linear(in_features=768, out_features=768, bias=True)
306
+ (activation): Tanh()
 
 
 
 
307
  )
308
  )
309
  )
310
+ (dropout): Dropout(p=0.3, inplace=False)
311
  (word_dropout): WordDropout(p=0.05)
312
  (locked_dropout): LockedDropout(p=0.5)
313
+ (linear): Linear(in_features=768, out_features=13, bias=True)
 
314
  (loss_function): CrossEntropyLoss()
315
  )"
316
+ 2022-10-04 14:07:15,510 ----------------------------------------------------------------------------------------------------
317
+ 2022-10-04 14:07:15,510 Corpus: "Corpus: 70000 train + 15000 dev + 15000 test sentences"
318
+ 2022-10-04 14:07:15,510 ----------------------------------------------------------------------------------------------------
319
+ 2022-10-04 14:07:15,511 Parameters:
320
+ 2022-10-04 14:07:15,511 - learning_rate: "0.010000"
321
+ 2022-10-04 14:07:15,511 - mini_batch_size: "8"
322
+ 2022-10-04 14:07:15,511 - patience: "3"
323
+ 2022-10-04 14:07:15,512 - anneal_factor: "0.5"
324
+ 2022-10-04 14:07:15,512 - max_epochs: "2"
325
+ 2022-10-04 14:07:15,512 - shuffle: "True"
326
+ 2022-10-04 14:07:15,512 - train_with_dev: "False"
327
+ 2022-10-04 14:07:15,513 - batch_growth_annealing: "False"
328
+ 2022-10-04 14:07:15,513 ----------------------------------------------------------------------------------------------------
329
+ 2022-10-04 14:07:15,513 Model training base path: "c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\trans_sm_flair"
330
+ 2022-10-04 14:07:15,513 ----------------------------------------------------------------------------------------------------
331
+ 2022-10-04 14:07:15,513 Device: cuda:0
332
+ 2022-10-04 14:07:15,514 ----------------------------------------------------------------------------------------------------
333
+ 2022-10-04 14:07:15,514 Embeddings storage mode: cpu
334
+ 2022-10-04 14:07:15,514 ----------------------------------------------------------------------------------------------------
335
+ 2022-10-04 14:08:50,056 epoch 1 - iter 875/8750 - loss 0.77736243 - samples/sec: 74.10 - lr: 0.010000
336
+ 2022-10-04 14:10:25,613 epoch 1 - iter 1750/8750 - loss 0.58654474 - samples/sec: 73.31 - lr: 0.010000
337
+ 2022-10-04 14:12:00,221 epoch 1 - iter 2625/8750 - loss 0.49473747 - samples/sec: 74.05 - lr: 0.010000
338
+ 2022-10-04 14:13:35,035 epoch 1 - iter 3500/8750 - loss 0.43711232 - samples/sec: 73.87 - lr: 0.010000
339
+ 2022-10-04 14:15:08,344 epoch 1 - iter 4375/8750 - loss 0.39713865 - samples/sec: 75.06 - lr: 0.010000
340
+ 2022-10-04 14:16:41,989 epoch 1 - iter 5250/8750 - loss 0.36731971 - samples/sec: 74.80 - lr: 0.010000
341
+ 2022-10-04 14:18:17,847 epoch 1 - iter 6125/8750 - loss 0.34209381 - samples/sec: 73.07 - lr: 0.010000
342
+ 2022-10-04 14:19:52,115 epoch 1 - iter 7000/8750 - loss 0.32256861 - samples/sec: 74.30 - lr: 0.010000
343
+ 2022-10-04 14:21:26,066 epoch 1 - iter 7875/8750 - loss 0.30596431 - samples/sec: 74.55 - lr: 0.010000
344
+ 2022-10-04 14:23:00,059 epoch 1 - iter 8750/8750 - loss 0.29124524 - samples/sec: 74.51 - lr: 0.010000
345
+ 2022-10-04 14:23:00,061 ----------------------------------------------------------------------------------------------------
346
+ 2022-10-04 14:23:00,062 EPOCH 1 done: loss 0.2912 - lr 0.010000
347
+ 2022-10-04 14:24:52,210 Evaluating as a multi-label problem: False
348
+ 2022-10-04 14:24:52,424 DEV : loss 0.06397613137960434 - f1-score (micro avg) 0.973
349
+ 2022-10-04 14:24:53,223 BAD EPOCHS (no improvement): 0
350
+ 2022-10-04 14:24:54,431 saving best model
351
+ 2022-10-04 14:24:55,749 ----------------------------------------------------------------------------------------------------
352
+ 2022-10-04 14:26:31,875 epoch 2 - iter 875/8750 - loss 0.15239591 - samples/sec: 72.88 - lr: 0.010000
353
+ 2022-10-04 14:28:12,311 epoch 2 - iter 1750/8750 - loss 0.15109719 - samples/sec: 69.74 - lr: 0.010000
354
+ 2022-10-04 14:29:49,414 epoch 2 - iter 2625/8750 - loss 0.15017726 - samples/sec: 72.14 - lr: 0.010000
355
+ 2022-10-04 14:31:22,789 epoch 2 - iter 3500/8750 - loss 0.14709937 - samples/sec: 75.01 - lr: 0.010000
356
+ 2022-10-04 14:32:56,365 epoch 2 - iter 4375/8750 - loss 0.14490590 - samples/sec: 74.87 - lr: 0.010000
357
+ 2022-10-04 14:34:29,769 epoch 2 - iter 5250/8750 - loss 0.14379219 - samples/sec: 75.00 - lr: 0.010000
358
+ 2022-10-04 14:36:04,122 epoch 2 - iter 6125/8750 - loss 0.14272196 - samples/sec: 74.24 - lr: 0.010000
359
+ 2022-10-04 14:37:40,084 epoch 2 - iter 7000/8750 - loss 0.14024151 - samples/sec: 73.00 - lr: 0.010000
360
+ 2022-10-04 14:39:15,077 epoch 2 - iter 7875/8750 - loss 0.13892120 - samples/sec: 73.73 - lr: 0.010000
361
+ 2022-10-04 14:40:48,611 epoch 2 - iter 8750/8750 - loss 0.13731836 - samples/sec: 74.89 - lr: 0.010000
362
+ 2022-10-04 14:40:48,617 ----------------------------------------------------------------------------------------------------
363
+ 2022-10-04 14:40:48,617 EPOCH 2 done: loss 0.1373 - lr 0.010000
364
+ 2022-10-04 14:42:50,048 Evaluating as a multi-label problem: False
365
+ 2022-10-04 14:42:50,277 DEV : loss 0.05747831612825394 - f1-score (micro avg) 0.9844
366
+ 2022-10-04 14:42:51,053 BAD EPOCHS (no improvement): 0
367
+ 2022-10-04 14:42:52,333 saving best model
368
+ 2022-10-04 14:42:54,576 ----------------------------------------------------------------------------------------------------
369
+ 2022-10-04 14:42:54,600 loading file c:\Users\Ivan\Documents\Projects\Yoda\NER\model\flair\src\..\models\trans_sm_flair\best-model.pt
370
+ 2022-10-04 14:42:57,086 SequenceTagger predicts: Dictionary with 13 tags: O, S-size, B-size, E-size, I-size, S-brand, B-brand, E-brand, I-brand, S-color, B-color, E-color, I-color
371
+ 2022-10-04 14:44:29,459 Evaluating as a multi-label problem: False
372
+ 2022-10-04 14:44:29,668 0.9816 0.9857 0.9837 0.9679
373
+ 2022-10-04 14:44:29,669
374
  Results:
375
+ - F-score (micro) 0.9837
376
+ - F-score (macro) 0.9843
377
+ - Accuracy 0.9679
378
 
379
  By class:
380
  precision recall f1-score support
381
 
382
+ size 0.9820 0.9859 0.9839 17988
383
+ brand 0.9773 0.9860 0.9817 11674
384
+ color 0.9905 0.9840 0.9872 5070
385
 
386
+ micro avg 0.9816 0.9857 0.9837 34732
387
+ macro avg 0.9833 0.9853 0.9843 34732
388
+ weighted avg 0.9816 0.9857 0.9837 34732
389
 
390
+ 2022-10-04 14:44:29,670 ----------------------------------------------------------------------------------------------------