PooryaPiroozfar commited on
Commit
1869f9a
1 Parent(s): 807b87e

Update training.log

Browse files
Files changed (1) hide show
  1. training.log +2 -654
training.log CHANGED
@@ -9,303 +9,11 @@
9
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
10
  (dropout): Dropout(p=0.1, inplace=False)
11
  )
12
- (encoder): BertEncoder(
13
- (layer): ModuleList(
14
- (0): BertLayer(
15
- (attention): BertAttention(
16
- (self): BertSelfAttention(
17
- (query): Linear(in_features=768, out_features=768, bias=True)
18
- (key): Linear(in_features=768, out_features=768, bias=True)
19
- (value): Linear(in_features=768, out_features=768, bias=True)
20
- (dropout): Dropout(p=0.1, inplace=False)
21
- )
22
- (output): BertSelfOutput(
23
- (dense): Linear(in_features=768, out_features=768, bias=True)
24
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
25
- (dropout): Dropout(p=0.1, inplace=False)
26
- )
27
- )
28
- (intermediate): BertIntermediate(
29
- (dense): Linear(in_features=768, out_features=3072, bias=True)
30
- (intermediate_act_fn): GELUActivation()
31
- )
32
- (output): BertOutput(
33
- (dense): Linear(in_features=3072, out_features=768, bias=True)
34
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
35
- (dropout): Dropout(p=0.1, inplace=False)
36
- )
37
- )
38
- (1): BertLayer(
39
- (attention): BertAttention(
40
- (self): BertSelfAttention(
41
- (query): Linear(in_features=768, out_features=768, bias=True)
42
- (key): Linear(in_features=768, out_features=768, bias=True)
43
- (value): Linear(in_features=768, out_features=768, bias=True)
44
- (dropout): Dropout(p=0.1, inplace=False)
45
- )
46
- (output): BertSelfOutput(
47
- (dense): Linear(in_features=768, out_features=768, bias=True)
48
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
49
- (dropout): Dropout(p=0.1, inplace=False)
50
- )
51
- )
52
- (intermediate): BertIntermediate(
53
- (dense): Linear(in_features=768, out_features=3072, bias=True)
54
- (intermediate_act_fn): GELUActivation()
55
- )
56
- (output): BertOutput(
57
- (dense): Linear(in_features=3072, out_features=768, bias=True)
58
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
59
- (dropout): Dropout(p=0.1, inplace=False)
60
- )
61
- )
62
- (2): BertLayer(
63
- (attention): BertAttention(
64
- (self): BertSelfAttention(
65
- (query): Linear(in_features=768, out_features=768, bias=True)
66
- (key): Linear(in_features=768, out_features=768, bias=True)
67
- (value): Linear(in_features=768, out_features=768, bias=True)
68
- (dropout): Dropout(p=0.1, inplace=False)
69
- )
70
- (output): BertSelfOutput(
71
- (dense): Linear(in_features=768, out_features=768, bias=True)
72
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
73
- (dropout): Dropout(p=0.1, inplace=False)
74
- )
75
- )
76
- (intermediate): BertIntermediate(
77
- (dense): Linear(in_features=768, out_features=3072, bias=True)
78
- (intermediate_act_fn): GELUActivation()
79
- )
80
- (output): BertOutput(
81
- (dense): Linear(in_features=3072, out_features=768, bias=True)
82
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
83
- (dropout): Dropout(p=0.1, inplace=False)
84
- )
85
- )
86
- (3): BertLayer(
87
- (attention): BertAttention(
88
- (self): BertSelfAttention(
89
- (query): Linear(in_features=768, out_features=768, bias=True)
90
- (key): Linear(in_features=768, out_features=768, bias=True)
91
- (value): Linear(in_features=768, out_features=768, bias=True)
92
- (dropout): Dropout(p=0.1, inplace=False)
93
- )
94
- (output): BertSelfOutput(
95
- (dense): Linear(in_features=768, out_features=768, bias=True)
96
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
97
- (dropout): Dropout(p=0.1, inplace=False)
98
- )
99
- )
100
- (intermediate): BertIntermediate(
101
- (dense): Linear(in_features=768, out_features=3072, bias=True)
102
- (intermediate_act_fn): GELUActivation()
103
- )
104
- (output): BertOutput(
105
- (dense): Linear(in_features=3072, out_features=768, bias=True)
106
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
107
- (dropout): Dropout(p=0.1, inplace=False)
108
- )
109
- )
110
- (4): BertLayer(
111
- (attention): BertAttention(
112
- (self): BertSelfAttention(
113
- (query): Linear(in_features=768, out_features=768, bias=True)
114
- (key): Linear(in_features=768, out_features=768, bias=True)
115
- (value): Linear(in_features=768, out_features=768, bias=True)
116
- (dropout): Dropout(p=0.1, inplace=False)
117
- )
118
- (output): BertSelfOutput(
119
- (dense): Linear(in_features=768, out_features=768, bias=True)
120
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
121
- (dropout): Dropout(p=0.1, inplace=False)
122
- )
123
- )
124
- (intermediate): BertIntermediate(
125
- (dense): Linear(in_features=768, out_features=3072, bias=True)
126
- (intermediate_act_fn): GELUActivation()
127
- )
128
- (output): BertOutput(
129
- (dense): Linear(in_features=3072, out_features=768, bias=True)
130
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
131
- (dropout): Dropout(p=0.1, inplace=False)
132
- )
133
- )
134
- (5): BertLayer(
135
- (attention): BertAttention(
136
- (self): BertSelfAttention(
137
- (query): Linear(in_features=768, out_features=768, bias=True)
138
- (key): Linear(in_features=768, out_features=768, bias=True)
139
- (value): Linear(in_features=768, out_features=768, bias=True)
140
- (dropout): Dropout(p=0.1, inplace=False)
141
- )
142
- (output): BertSelfOutput(
143
- (dense): Linear(in_features=768, out_features=768, bias=True)
144
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
145
- (dropout): Dropout(p=0.1, inplace=False)
146
- )
147
- )
148
- (intermediate): BertIntermediate(
149
- (dense): Linear(in_features=768, out_features=3072, bias=True)
150
- (intermediate_act_fn): GELUActivation()
151
- )
152
- (output): BertOutput(
153
- (dense): Linear(in_features=3072, out_features=768, bias=True)
154
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
155
- (dropout): Dropout(p=0.1, inplace=False)
156
- )
157
- )
158
- (6): BertLayer(
159
- (attention): BertAttention(
160
- (self): BertSelfAttention(
161
- (query): Linear(in_features=768, out_features=768, bias=True)
162
- (key): Linear(in_features=768, out_features=768, bias=True)
163
- (value): Linear(in_features=768, out_features=768, bias=True)
164
- (dropout): Dropout(p=0.1, inplace=False)
165
- )
166
- (output): BertSelfOutput(
167
- (dense): Linear(in_features=768, out_features=768, bias=True)
168
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
169
- (dropout): Dropout(p=0.1, inplace=False)
170
- )
171
- )
172
- (intermediate): BertIntermediate(
173
- (dense): Linear(in_features=768, out_features=3072, bias=True)
174
- (intermediate_act_fn): GELUActivation()
175
- )
176
- (output): BertOutput(
177
- (dense): Linear(in_features=3072, out_features=768, bias=True)
178
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
179
- (dropout): Dropout(p=0.1, inplace=False)
180
- )
181
- )
182
- (7): BertLayer(
183
- (attention): BertAttention(
184
- (self): BertSelfAttention(
185
- (query): Linear(in_features=768, out_features=768, bias=True)
186
- (key): Linear(in_features=768, out_features=768, bias=True)
187
- (value): Linear(in_features=768, out_features=768, bias=True)
188
- (dropout): Dropout(p=0.1, inplace=False)
189
- )
190
- (output): BertSelfOutput(
191
- (dense): Linear(in_features=768, out_features=768, bias=True)
192
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
193
- (dropout): Dropout(p=0.1, inplace=False)
194
- )
195
- )
196
- (intermediate): BertIntermediate(
197
- (dense): Linear(in_features=768, out_features=3072, bias=True)
198
- (intermediate_act_fn): GELUActivation()
199
- )
200
- (output): BertOutput(
201
- (dense): Linear(in_features=3072, out_features=768, bias=True)
202
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
203
- (dropout): Dropout(p=0.1, inplace=False)
204
- )
205
- )
206
- (8): BertLayer(
207
- (attention): BertAttention(
208
- (self): BertSelfAttention(
209
- (query): Linear(in_features=768, out_features=768, bias=True)
210
- (key): Linear(in_features=768, out_features=768, bias=True)
211
- (value): Linear(in_features=768, out_features=768, bias=True)
212
- (dropout): Dropout(p=0.1, inplace=False)
213
- )
214
- (output): BertSelfOutput(
215
- (dense): Linear(in_features=768, out_features=768, bias=True)
216
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
217
- (dropout): Dropout(p=0.1, inplace=False)
218
- )
219
- )
220
- (intermediate): BertIntermediate(
221
- (dense): Linear(in_features=768, out_features=3072, bias=True)
222
- (intermediate_act_fn): GELUActivation()
223
- )
224
- (output): BertOutput(
225
- (dense): Linear(in_features=3072, out_features=768, bias=True)
226
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
227
- (dropout): Dropout(p=0.1, inplace=False)
228
- )
229
- )
230
- (9): BertLayer(
231
- (attention): BertAttention(
232
- (self): BertSelfAttention(
233
- (query): Linear(in_features=768, out_features=768, bias=True)
234
- (key): Linear(in_features=768, out_features=768, bias=True)
235
- (value): Linear(in_features=768, out_features=768, bias=True)
236
- (dropout): Dropout(p=0.1, inplace=False)
237
- )
238
- (output): BertSelfOutput(
239
- (dense): Linear(in_features=768, out_features=768, bias=True)
240
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
241
- (dropout): Dropout(p=0.1, inplace=False)
242
- )
243
- )
244
- (intermediate): BertIntermediate(
245
- (dense): Linear(in_features=768, out_features=3072, bias=True)
246
- (intermediate_act_fn): GELUActivation()
247
- )
248
- (output): BertOutput(
249
- (dense): Linear(in_features=3072, out_features=768, bias=True)
250
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
251
- (dropout): Dropout(p=0.1, inplace=False)
252
- )
253
- )
254
- (10): BertLayer(
255
- (attention): BertAttention(
256
- (self): BertSelfAttention(
257
- (query): Linear(in_features=768, out_features=768, bias=True)
258
- (key): Linear(in_features=768, out_features=768, bias=True)
259
- (value): Linear(in_features=768, out_features=768, bias=True)
260
- (dropout): Dropout(p=0.1, inplace=False)
261
- )
262
- (output): BertSelfOutput(
263
- (dense): Linear(in_features=768, out_features=768, bias=True)
264
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
265
- (dropout): Dropout(p=0.1, inplace=False)
266
- )
267
- )
268
- (intermediate): BertIntermediate(
269
- (dense): Linear(in_features=768, out_features=3072, bias=True)
270
- (intermediate_act_fn): GELUActivation()
271
- )
272
- (output): BertOutput(
273
- (dense): Linear(in_features=3072, out_features=768, bias=True)
274
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
275
- (dropout): Dropout(p=0.1, inplace=False)
276
- )
277
- )
278
- (11): BertLayer(
279
- (attention): BertAttention(
280
- (self): BertSelfAttention(
281
- (query): Linear(in_features=768, out_features=768, bias=True)
282
- (key): Linear(in_features=768, out_features=768, bias=True)
283
- (value): Linear(in_features=768, out_features=768, bias=True)
284
- (dropout): Dropout(p=0.1, inplace=False)
285
- )
286
- (output): BertSelfOutput(
287
- (dense): Linear(in_features=768, out_features=768, bias=True)
288
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
289
- (dropout): Dropout(p=0.1, inplace=False)
290
- )
291
- )
292
- (intermediate): BertIntermediate(
293
- (dense): Linear(in_features=768, out_features=3072, bias=True)
294
- (intermediate_act_fn): GELUActivation()
295
- )
296
- (output): BertOutput(
297
- (dense): Linear(in_features=3072, out_features=768, bias=True)
298
- (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
299
- (dropout): Dropout(p=0.1, inplace=False)
300
- )
301
- )
302
- )
303
- )
304
  (pooler): BertPooler(
305
  (dense): Linear(in_features=768, out_features=768, bias=True)
306
  (activation): Tanh()
307
  )
308
- )
309
  )
310
  (word_dropout): WordDropout(p=0.05)
311
  (locked_dropout): LockedDropout(p=0.5)
@@ -327,371 +35,11 @@
327
  2023-01-08 08:23:21,514 - train_with_dev: "False"
328
  2023-01-08 08:23:21,515 - batch_growth_annealing: "False"
329
  2023-01-08 08:23:21,516 ----------------------------------------------------------------------------------------------------
330
- 2023-01-08 08:23:21,517 Model training base path: "resources/taggers/nsurl_512_000_noshuffle_15epoch"
331
  2023-01-08 08:23:21,518 ----------------------------------------------------------------------------------------------------
332
  2023-01-08 08:23:21,519 Device: cuda:0
333
  2023-01-08 08:23:21,519 ----------------------------------------------------------------------------------------------------
334
  2023-01-08 08:23:21,520 Embeddings storage mode: none
335
- 2023-01-08 08:23:21,523 ----------------------------------------------------------------------------------------------------
336
- 2023-01-08 08:25:35,073 epoch 1 - iter 652/6529 - loss 2.76353314 - samples/sec: 19.54 - lr: 0.000000
337
- 2023-01-08 08:27:43,826 epoch 1 - iter 1304/6529 - loss 2.10468350 - samples/sec: 20.26 - lr: 0.000000
338
- 2023-01-08 08:29:54,350 epoch 1 - iter 1956/6529 - loss 1.67857681 - samples/sec: 19.99 - lr: 0.000001
339
- 2023-01-08 08:32:06,509 epoch 1 - iter 2608/6529 - loss 1.40096638 - samples/sec: 19.74 - lr: 0.000001
340
- 2023-01-08 08:34:21,272 epoch 1 - iter 3260/6529 - loss 1.20354192 - samples/sec: 19.36 - lr: 0.000001
341
- 2023-01-08 08:36:35,474 epoch 1 - iter 3912/6529 - loss 1.06876365 - samples/sec: 19.44 - lr: 0.000001
342
- 2023-01-08 08:38:49,910 epoch 1 - iter 4564/6529 - loss 0.96565944 - samples/sec: 19.41 - lr: 0.000001
343
- 2023-01-08 08:41:02,964 epoch 1 - iter 5216/6529 - loss 0.89404243 - samples/sec: 19.61 - lr: 0.000002
344
- 2023-01-08 08:43:17,807 epoch 1 - iter 5868/6529 - loss 0.82707116 - samples/sec: 19.35 - lr: 0.000002
345
- 2023-01-08 08:45:30,584 epoch 1 - iter 6520/6529 - loss 0.77020616 - samples/sec: 19.65 - lr: 0.000002
346
- 2023-01-08 08:45:32,301 ----------------------------------------------------------------------------------------------------
347
- 2023-01-08 08:45:32,303 EPOCH 1 done: loss 0.7697 - lr 0.0000020
348
- 2023-01-08 08:46:43,903 DEV : loss 0.14591681957244873 - f1-score (micro avg) 0.6513
349
- 2023-01-08 08:46:43,968 BAD EPOCHS (no improvement): 4
350
- 2023-01-08 08:46:43,969 ----------------------------------------------------------------------------------------------------
351
- 2023-01-08 08:49:00,303 epoch 2 - iter 652/6529 - loss 0.26767246 - samples/sec: 19.14 - lr: 0.000002
352
- 2023-01-08 08:51:14,930 epoch 2 - iter 1304/6529 - loss 0.26032050 - samples/sec: 19.38 - lr: 0.000002
353
- 2023-01-08 08:53:24,163 epoch 2 - iter 1956/6529 - loss 0.25277047 - samples/sec: 20.19 - lr: 0.000003
354
- 2023-01-08 08:55:38,396 epoch 2 - iter 2608/6529 - loss 0.25357495 - samples/sec: 19.44 - lr: 0.000003
355
- 2023-01-08 08:57:51,341 epoch 2 - iter 3260/6529 - loss 0.25742882 - samples/sec: 19.63 - lr: 0.000003
356
- 2023-01-08 09:00:05,181 epoch 2 - iter 3912/6529 - loss 0.25759538 - samples/sec: 19.49 - lr: 0.000003
357
- 2023-01-08 09:02:19,914 epoch 2 - iter 4564/6529 - loss 0.25655432 - samples/sec: 19.37 - lr: 0.000003
358
- 2023-01-08 09:04:32,472 epoch 2 - iter 5216/6529 - loss 0.25512109 - samples/sec: 19.68 - lr: 0.000004
359
- 2023-01-08 09:06:45,546 epoch 2 - iter 5868/6529 - loss 0.25205262 - samples/sec: 19.61 - lr: 0.000004
360
- 2023-01-08 09:08:59,435 epoch 2 - iter 6520/6529 - loss 0.24897569 - samples/sec: 19.49 - lr: 0.000004
361
- 2023-01-08 09:09:01,160 ----------------------------------------------------------------------------------------------------
362
- 2023-01-08 09:09:01,163 EPOCH 2 done: loss 0.2490 - lr 0.0000040
363
- 2023-01-08 09:10:14,132 DEV : loss 0.09620723873376846 - f1-score (micro avg) 0.8018
364
- 2023-01-08 09:10:14,175 BAD EPOCHS (no improvement): 4
365
- 2023-01-08 09:10:14,176 ----------------------------------------------------------------------------------------------------
366
- 2023-01-08 09:12:30,516 epoch 3 - iter 652/6529 - loss 0.21670855 - samples/sec: 19.14 - lr: 0.000004
367
- 2023-01-08 09:14:44,527 epoch 3 - iter 1304/6529 - loss 0.21922916 - samples/sec: 19.47 - lr: 0.000004
368
- 2023-01-08 09:16:56,659 epoch 3 - iter 1956/6529 - loss 0.21763112 - samples/sec: 19.75 - lr: 0.000005
369
- 2023-01-08 09:19:11,830 epoch 3 - iter 2608/6529 - loss 0.22018173 - samples/sec: 19.30 - lr: 0.000005
370
- 2023-01-08 09:21:25,549 epoch 3 - iter 3260/6529 - loss 0.22296623 - samples/sec: 19.51 - lr: 0.000005
371
- 2023-01-08 09:23:49,595 epoch 3 - iter 3912/6529 - loss 0.22464455 - samples/sec: 18.11 - lr: 0.000005
372
- 2023-01-08 09:26:08,249 epoch 3 - iter 4564/6529 - loss 0.22413709 - samples/sec: 18.82 - lr: 0.000005
373
- 2023-01-08 09:28:21,561 epoch 3 - iter 5216/6529 - loss 0.22271140 - samples/sec: 19.57 - lr: 0.000005
374
- 2023-01-08 09:30:39,450 epoch 3 - iter 5868/6529 - loss 0.22078188 - samples/sec: 18.92 - lr: 0.000005
375
- 2023-01-08 09:32:56,931 epoch 3 - iter 6520/6529 - loss 0.21923857 - samples/sec: 18.98 - lr: 0.000005
376
- 2023-01-08 09:32:58,863 ----------------------------------------------------------------------------------------------------
377
- 2023-01-08 09:32:58,865 EPOCH 3 done: loss 0.2193 - lr 0.0000049
378
- 2023-01-08 09:34:11,869 DEV : loss 0.08930665999650955 - f1-score (micro avg) 0.8357
379
- 2023-01-08 09:34:11,911 BAD EPOCHS (no improvement): 4
380
- 2023-01-08 09:34:11,912 ----------------------------------------------------------------------------------------------------
381
- 2023-01-08 09:36:27,376 epoch 4 - iter 652/6529 - loss 0.19632441 - samples/sec: 19.26 - lr: 0.000005
382
- 2023-01-08 09:38:41,808 epoch 4 - iter 1304/6529 - loss 0.19654954 - samples/sec: 19.41 - lr: 0.000005
383
- 2023-01-08 09:40:53,083 epoch 4 - iter 1956/6529 - loss 0.19641485 - samples/sec: 19.88 - lr: 0.000005
384
- 2023-01-08 09:43:06,935 epoch 4 - iter 2608/6529 - loss 0.19908824 - samples/sec: 19.49 - lr: 0.000005
385
- 2023-01-08 09:45:22,775 epoch 4 - iter 3260/6529 - loss 0.20233334 - samples/sec: 19.21 - lr: 0.000005
386
- 2023-01-08 09:47:38,337 epoch 4 - iter 3912/6529 - loss 0.20352574 - samples/sec: 19.25 - lr: 0.000005
387
- 2023-01-08 09:49:52,733 epoch 4 - iter 4564/6529 - loss 0.20279599 - samples/sec: 19.41 - lr: 0.000005
388
- 2023-01-08 09:52:08,210 epoch 4 - iter 5216/6529 - loss 0.20192930 - samples/sec: 19.26 - lr: 0.000005
389
- 2023-01-08 09:54:24,632 epoch 4 - iter 5868/6529 - loss 0.20036623 - samples/sec: 19.13 - lr: 0.000005
390
- 2023-01-08 09:56:39,471 epoch 4 - iter 6520/6529 - loss 0.19916323 - samples/sec: 19.35 - lr: 0.000005
391
- 2023-01-08 09:56:41,133 ----------------------------------------------------------------------------------------------------
392
- 2023-01-08 09:56:41,136 EPOCH 4 done: loss 0.1992 - lr 0.0000047
393
- 2023-01-08 09:57:57,145 DEV : loss 0.0921374261379242 - f1-score (micro avg) 0.8614
394
- 2023-01-08 09:57:57,193 BAD EPOCHS (no improvement): 4
395
- 2023-01-08 09:57:57,195 ----------------------------------------------------------------------------------------------------
396
- 2023-01-08 10:00:12,637 epoch 5 - iter 652/6529 - loss 0.17778101 - samples/sec: 19.26 - lr: 0.000005
397
- 2023-01-08 10:02:28,175 epoch 5 - iter 1304/6529 - loss 0.18126676 - samples/sec: 19.25 - lr: 0.000005
398
- 2023-01-08 10:04:43,855 epoch 5 - iter 1956/6529 - loss 0.18348900 - samples/sec: 19.23 - lr: 0.000005
399
- 2023-01-08 10:06:58,958 epoch 5 - iter 2608/6529 - loss 0.18486018 - samples/sec: 19.31 - lr: 0.000005
400
- 2023-01-08 10:09:15,265 epoch 5 - iter 3260/6529 - loss 0.18834373 - samples/sec: 19.14 - lr: 0.000005
401
- 2023-01-08 10:11:31,435 epoch 5 - iter 3912/6529 - loss 0.18964518 - samples/sec: 19.16 - lr: 0.000005
402
- 2023-01-08 10:13:46,637 epoch 5 - iter 4564/6529 - loss 0.18928872 - samples/sec: 19.30 - lr: 0.000005
403
- 2023-01-08 10:16:01,327 epoch 5 - iter 5216/6529 - loss 0.18879883 - samples/sec: 19.37 - lr: 0.000004
404
- 2023-01-08 10:18:16,826 epoch 5 - iter 5868/6529 - loss 0.18708286 - samples/sec: 19.26 - lr: 0.000004
405
- 2023-01-08 10:20:34,164 epoch 5 - iter 6520/6529 - loss 0.18582796 - samples/sec: 19.00 - lr: 0.000004
406
- 2023-01-08 10:20:36,025 ----------------------------------------------------------------------------------------------------
407
- 2023-01-08 10:20:36,028 EPOCH 5 done: loss 0.1859 - lr 0.0000044
408
- 2023-01-08 10:21:52,956 DEV : loss 0.09580960869789124 - f1-score (micro avg) 0.8699
409
- 2023-01-08 10:21:53,002 BAD EPOCHS (no improvement): 4
410
- 2023-01-08 10:21:53,004 ----------------------------------------------------------------------------------------------------
411
- 2023-01-08 10:24:09,733 epoch 6 - iter 652/6529 - loss 0.17521655 - samples/sec: 19.08 - lr: 0.000004
412
- 2023-01-08 10:26:21,528 epoch 6 - iter 1304/6529 - loss 0.17424610 - samples/sec: 19.80 - lr: 0.000004
413
- 2023-01-08 10:28:34,945 epoch 6 - iter 1956/6529 - loss 0.17396923 - samples/sec: 19.56 - lr: 0.000004
414
- 2023-01-08 10:30:49,825 epoch 6 - iter 2608/6529 - loss 0.17528702 - samples/sec: 19.34 - lr: 0.000004
415
- 2023-01-08 10:33:06,765 epoch 6 - iter 3260/6529 - loss 0.17777277 - samples/sec: 19.05 - lr: 0.000004
416
- 2023-01-08 10:35:23,591 epoch 6 - iter 3912/6529 - loss 0.17930874 - samples/sec: 19.07 - lr: 0.000004
417
- 2023-01-08 10:37:41,441 epoch 6 - iter 4564/6529 - loss 0.17885569 - samples/sec: 18.93 - lr: 0.000004
418
- 2023-01-08 10:39:56,482 epoch 6 - iter 5216/6529 - loss 0.17822966 - samples/sec: 19.32 - lr: 0.000004
419
- 2023-01-08 10:42:11,452 epoch 6 - iter 5868/6529 - loss 0.17761229 - samples/sec: 19.33 - lr: 0.000004
420
- 2023-01-08 10:44:29,082 epoch 6 - iter 6520/6529 - loss 0.17612404 - samples/sec: 18.96 - lr: 0.000004
421
- 2023-01-08 10:44:31,061 ----------------------------------------------------------------------------------------------------
422
- 2023-01-08 10:44:31,063 EPOCH 6 done: loss 0.1762 - lr 0.0000042
423
- 2023-01-08 10:45:47,791 DEV : loss 0.10489046573638916 - f1-score (micro avg) 0.8826
424
- 2023-01-08 10:45:47,842 BAD EPOCHS (no improvement): 4
425
- 2023-01-08 10:45:47,844 ----------------------------------------------------------------------------------------------------
426
- 2023-01-08 10:48:01,767 epoch 7 - iter 652/6529 - loss 0.16234851 - samples/sec: 19.48 - lr: 0.000004
427
- 2023-01-08 10:50:17,317 epoch 7 - iter 1304/6529 - loss 0.16401460 - samples/sec: 19.25 - lr: 0.000004
428
- 2023-01-08 10:52:30,318 epoch 7 - iter 1956/6529 - loss 0.16447709 - samples/sec: 19.62 - lr: 0.000004
429
- 2023-01-08 10:54:46,762 epoch 7 - iter 2608/6529 - loss 0.16559102 - samples/sec: 19.12 - lr: 0.000004
430
- 2023-01-08 10:57:04,951 epoch 7 - iter 3260/6529 - loss 0.16837598 - samples/sec: 18.88 - lr: 0.000004
431
- 2023-01-08 10:59:20,546 epoch 7 - iter 3912/6529 - loss 0.17080542 - samples/sec: 19.24 - lr: 0.000004
432
- 2023-01-08 11:01:39,182 epoch 7 - iter 4564/6529 - loss 0.17015294 - samples/sec: 18.82 - lr: 0.000004
433
- 2023-01-08 11:03:54,395 epoch 7 - iter 5216/6529 - loss 0.16971769 - samples/sec: 19.30 - lr: 0.000004
434
- 2023-01-08 11:06:10,242 epoch 7 - iter 5868/6529 - loss 0.16883507 - samples/sec: 19.21 - lr: 0.000004
435
- 2023-01-08 11:08:28,889 epoch 7 - iter 6520/6529 - loss 0.16791804 - samples/sec: 18.82 - lr: 0.000004
436
- 2023-01-08 11:08:30,777 ----------------------------------------------------------------------------------------------------
437
- 2023-01-08 11:08:30,780 EPOCH 7 done: loss 0.1679 - lr 0.0000040
438
- 2023-01-08 11:09:46,112 DEV : loss 0.10590970516204834 - f1-score (micro avg) 0.892
439
- 2023-01-08 11:09:46,164 BAD EPOCHS (no improvement): 4
440
- 2023-01-08 11:09:46,166 ----------------------------------------------------------------------------------------------------
441
- 2023-01-08 11:12:00,934 epoch 8 - iter 652/6529 - loss 0.15312178 - samples/sec: 19.36 - lr: 0.000004
442
- 2023-01-08 11:14:14,666 epoch 8 - iter 1304/6529 - loss 0.15723507 - samples/sec: 19.51 - lr: 0.000004
443
- 2023-01-08 11:16:31,894 epoch 8 - iter 1956/6529 - loss 0.15652843 - samples/sec: 19.01 - lr: 0.000004
444
- 2023-01-08 11:18:46,987 epoch 8 - iter 2608/6529 - loss 0.15826168 - samples/sec: 19.31 - lr: 0.000004
445
- 2023-01-08 11:21:00,593 epoch 8 - iter 3260/6529 - loss 0.16011241 - samples/sec: 19.53 - lr: 0.000004
446
- 2023-01-08 11:23:17,924 epoch 8 - iter 3912/6529 - loss 0.16210808 - samples/sec: 19.00 - lr: 0.000004
447
- 2023-01-08 11:25:34,299 epoch 8 - iter 4564/6529 - loss 0.16260045 - samples/sec: 19.13 - lr: 0.000004
448
- 2023-01-08 11:27:46,535 epoch 8 - iter 5216/6529 - loss 0.16219764 - samples/sec: 19.73 - lr: 0.000004
449
- 2023-01-08 11:30:03,401 epoch 8 - iter 5868/6529 - loss 0.16156175 - samples/sec: 19.06 - lr: 0.000004
450
- 2023-01-08 11:32:17,565 epoch 8 - iter 6520/6529 - loss 0.16053879 - samples/sec: 19.45 - lr: 0.000004
451
- 2023-01-08 11:32:19,391 ----------------------------------------------------------------------------------------------------
452
- 2023-01-08 11:32:19,393 EPOCH 8 done: loss 0.1606 - lr 0.0000038
453
- 2023-01-08 11:33:33,542 DEV : loss 0.10866044461727142 - f1-score (micro avg) 0.889
454
- 2023-01-08 11:33:33,590 BAD EPOCHS (no improvement): 4
455
- 2023-01-08 11:33:33,592 ----------------------------------------------------------------------------------------------------
456
- 2023-01-08 11:35:46,606 epoch 9 - iter 652/6529 - loss 0.15568375 - samples/sec: 19.62 - lr: 0.000004
457
- 2023-01-08 11:38:00,623 epoch 9 - iter 1304/6529 - loss 0.15500402 - samples/sec: 19.47 - lr: 0.000004
458
- 2023-01-08 11:40:11,536 epoch 9 - iter 1956/6529 - loss 0.15346711 - samples/sec: 19.93 - lr: 0.000004
459
- 2023-01-08 11:42:31,019 epoch 9 - iter 2608/6529 - loss 0.15530038 - samples/sec: 18.71 - lr: 0.000004
460
- 2023-01-08 11:44:46,689 epoch 9 - iter 3260/6529 - loss 0.15662159 - samples/sec: 19.23 - lr: 0.000004
461
- 2023-01-08 11:47:04,958 epoch 9 - iter 3912/6529 - loss 0.15851655 - samples/sec: 18.87 - lr: 0.000004
462
- 2023-01-08 11:49:25,939 epoch 9 - iter 4564/6529 - loss 0.15831685 - samples/sec: 18.51 - lr: 0.000004
463
- 2023-01-08 11:51:41,077 epoch 9 - iter 5216/6529 - loss 0.15778522 - samples/sec: 19.31 - lr: 0.000004
464
- 2023-01-08 11:54:00,178 epoch 9 - iter 5868/6529 - loss 0.15675165 - samples/sec: 18.76 - lr: 0.000004
465
- 2023-01-08 11:56:18,653 epoch 9 - iter 6520/6529 - loss 0.15587139 - samples/sec: 18.84 - lr: 0.000004
466
- 2023-01-08 11:56:20,505 ----------------------------------------------------------------------------------------------------
467
- 2023-01-08 11:56:20,506 EPOCH 9 done: loss 0.1559 - lr 0.0000036
468
- 2023-01-08 11:57:35,001 DEV : loss 0.11621606349945068 - f1-score (micro avg) 0.8955
469
- 2023-01-08 11:57:35,052 BAD EPOCHS (no improvement): 4
470
- 2023-01-08 11:57:35,054 ----------------------------------------------------------------------------------------------------
471
- 2023-01-08 11:59:50,825 epoch 10 - iter 652/6529 - loss 0.14409633 - samples/sec: 19.22 - lr: 0.000004
472
- 2023-01-08 12:02:06,533 epoch 10 - iter 1304/6529 - loss 0.14631135 - samples/sec: 19.23 - lr: 0.000004
473
- 2023-01-08 12:04:21,322 epoch 10 - iter 1956/6529 - loss 0.14735676 - samples/sec: 19.36 - lr: 0.000003
474
- 2023-01-08 12:06:35,422 epoch 10 - iter 2608/6529 - loss 0.14904395 - samples/sec: 19.46 - lr: 0.000003
475
- 2023-01-08 12:08:53,778 epoch 10 - iter 3260/6529 - loss 0.15018463 - samples/sec: 18.86 - lr: 0.000003
476
- 2023-01-08 12:11:11,643 epoch 10 - iter 3912/6529 - loss 0.15132750 - samples/sec: 18.93 - lr: 0.000003
477
- 2023-01-08 12:13:29,660 epoch 10 - iter 4564/6529 - loss 0.15188127 - samples/sec: 18.90 - lr: 0.000003
478
- 2023-01-08 12:15:44,264 epoch 10 - iter 5216/6529 - loss 0.15133341 - samples/sec: 19.38 - lr: 0.000003
479
- 2023-01-08 12:18:01,119 epoch 10 - iter 5868/6529 - loss 0.15156043 - samples/sec: 19.06 - lr: 0.000003
480
- 2023-01-08 12:20:16,350 epoch 10 - iter 6520/6529 - loss 0.15045767 - samples/sec: 19.29 - lr: 0.000003
481
- 2023-01-08 12:20:18,235 ----------------------------------------------------------------------------------------------------
482
- 2023-01-08 12:20:18,237 EPOCH 10 done: loss 0.1505 - lr 0.0000033
483
- 2023-01-08 12:21:35,768 DEV : loss 0.11673574149608612 - f1-score (micro avg) 0.8996
484
- 2023-01-08 12:21:35,818 BAD EPOCHS (no improvement): 4
485
- 2023-01-08 12:21:35,820 ----------------------------------------------------------------------------------------------------
486
- 2023-01-08 12:23:55,505 epoch 11 - iter 652/6529 - loss 0.14428276 - samples/sec: 18.68 - lr: 0.000003
487
- 2023-01-08 12:26:10,978 epoch 11 - iter 1304/6529 - loss 0.14390834 - samples/sec: 19.26 - lr: 0.000003
488
- 2023-01-08 12:28:26,666 epoch 11 - iter 1956/6529 - loss 0.14472155 - samples/sec: 19.23 - lr: 0.000003
489
- 2023-01-08 12:30:41,813 epoch 11 - iter 2608/6529 - loss 0.14514745 - samples/sec: 19.31 - lr: 0.000003
490
- 2023-01-08 12:32:57,301 epoch 11 - iter 3260/6529 - loss 0.14604008 - samples/sec: 19.26 - lr: 0.000003
491
- 2023-01-08 12:35:12,776 epoch 11 - iter 3912/6529 - loss 0.14811782 - samples/sec: 19.26 - lr: 0.000003
492
- 2023-01-08 12:37:29,540 epoch 11 - iter 4564/6529 - loss 0.14833497 - samples/sec: 19.08 - lr: 0.000003
493
- 2023-01-08 12:39:44,148 epoch 11 - iter 5216/6529 - loss 0.14770587 - samples/sec: 19.38 - lr: 0.000003
494
- 2023-01-08 12:41:57,137 epoch 11 - iter 5868/6529 - loss 0.14687045 - samples/sec: 19.62 - lr: 0.000003
495
- 2023-01-08 12:44:12,270 epoch 11 - iter 6520/6529 - loss 0.14637472 - samples/sec: 19.31 - lr: 0.000003
496
- 2023-01-08 12:44:14,156 ----------------------------------------------------------------------------------------------------
497
- 2023-01-08 12:44:14,161 EPOCH 11 done: loss 0.1464 - lr 0.0000031
498
- 2023-01-08 12:45:32,296 DEV : loss 0.13083890080451965 - f1-score (micro avg) 0.9023
499
- 2023-01-08 12:45:32,345 BAD EPOCHS (no improvement): 4
500
- 2023-01-08 12:45:32,346 ----------------------------------------------------------------------------------------------------
501
- 2023-01-08 12:47:48,927 epoch 12 - iter 652/6529 - loss 0.13782378 - samples/sec: 19.10 - lr: 0.000003
502
- 2023-01-08 12:50:06,437 epoch 12 - iter 1304/6529 - loss 0.13900710 - samples/sec: 18.97 - lr: 0.000003
503
- 2023-01-08 12:52:19,502 epoch 12 - iter 1956/6529 - loss 0.13938580 - samples/sec: 19.61 - lr: 0.000003
504
- 2023-01-08 12:54:34,968 epoch 12 - iter 2608/6529 - loss 0.14020151 - samples/sec: 19.26 - lr: 0.000003
505
- 2023-01-08 12:56:52,757 epoch 12 - iter 3260/6529 - loss 0.14277796 - samples/sec: 18.94 - lr: 0.000003
506
- 2023-01-08 12:59:08,728 epoch 12 - iter 3912/6529 - loss 0.14513185 - samples/sec: 19.19 - lr: 0.000003
507
- 2023-01-08 13:01:25,805 epoch 12 - iter 4564/6529 - loss 0.14531044 - samples/sec: 19.03 - lr: 0.000003
508
- 2023-01-08 13:03:41,339 epoch 12 - iter 5216/6529 - loss 0.14480840 - samples/sec: 19.25 - lr: 0.000003
509
- 2023-01-08 13:05:56,188 epoch 12 - iter 5868/6529 - loss 0.14468314 - samples/sec: 19.35 - lr: 0.000003
510
- 2023-01-08 13:08:13,187 epoch 12 - iter 6520/6529 - loss 0.14374744 - samples/sec: 19.05 - lr: 0.000003
511
- 2023-01-08 13:08:15,004 ----------------------------------------------------------------------------------------------------
512
- 2023-01-08 13:08:15,008 EPOCH 12 done: loss 0.1438 - lr 0.0000029
513
- 2023-01-08 13:09:29,582 DEV : loss 0.13419032096862793 - f1-score (micro avg) 0.9025
514
- 2023-01-08 13:09:29,636 BAD EPOCHS (no improvement): 4
515
- 2023-01-08 13:09:29,638 ----------------------------------------------------------------------------------------------------
516
- 2023-01-08 13:11:47,888 epoch 13 - iter 652/6529 - loss 0.13602517 - samples/sec: 18.87 - lr: 0.000003
517
- 2023-01-08 13:14:03,642 epoch 13 - iter 1304/6529 - loss 0.13732952 - samples/sec: 19.22 - lr: 0.000003
518
- 2023-01-08 13:16:16,517 epoch 13 - iter 1956/6529 - loss 0.13703433 - samples/sec: 19.64 - lr: 0.000003
519
- 2023-01-08 13:18:32,679 epoch 13 - iter 2608/6529 - loss 0.13862001 - samples/sec: 19.16 - lr: 0.000003
520
- 2023-01-08 13:20:50,849 epoch 13 - iter 3260/6529 - loss 0.14052905 - samples/sec: 18.88 - lr: 0.000003
521
- 2023-01-08 13:23:08,216 epoch 13 - iter 3912/6529 - loss 0.14156690 - samples/sec: 18.99 - lr: 0.000003
522
- 2023-01-08 13:25:24,958 epoch 13 - iter 4564/6529 - loss 0.14102465 - samples/sec: 19.08 - lr: 0.000003
523
- 2023-01-08 13:27:40,418 epoch 13 - iter 5216/6529 - loss 0.14044374 - samples/sec: 19.26 - lr: 0.000003
524
- 2023-01-08 13:29:57,587 epoch 13 - iter 5868/6529 - loss 0.14039873 - samples/sec: 19.02 - lr: 0.000003
525
- 2023-01-08 13:32:16,056 epoch 13 - iter 6520/6529 - loss 0.13956668 - samples/sec: 18.84 - lr: 0.000003
526
- 2023-01-08 13:32:17,665 ----------------------------------------------------------------------------------------------------
527
- 2023-01-08 13:32:17,668 EPOCH 13 done: loss 0.1396 - lr 0.0000027
528
- 2023-01-08 13:33:31,771 DEV : loss 0.13482151925563812 - f1-score (micro avg) 0.9055
529
- 2023-01-08 13:33:31,821 BAD EPOCHS (no improvement): 4
530
- 2023-01-08 13:33:31,823 ----------------------------------------------------------------------------------------------------
531
- 2023-01-08 13:35:50,820 epoch 14 - iter 652/6529 - loss 0.13136383 - samples/sec: 18.77 - lr: 0.000003
532
- 2023-01-08 13:38:04,351 epoch 14 - iter 1304/6529 - loss 0.13382280 - samples/sec: 19.54 - lr: 0.000003
533
- 2023-01-08 13:40:18,657 epoch 14 - iter 1956/6529 - loss 0.13488302 - samples/sec: 19.43 - lr: 0.000003
534
- 2023-01-08 13:42:36,373 epoch 14 - iter 2608/6529 - loss 0.13564871 - samples/sec: 18.95 - lr: 0.000003
535
- 2023-01-08 13:44:53,302 epoch 14 - iter 3260/6529 - loss 0.13706665 - samples/sec: 19.05 - lr: 0.000003
536
- 2023-01-08 13:47:09,554 epoch 14 - iter 3912/6529 - loss 0.13866847 - samples/sec: 19.15 - lr: 0.000003
537
- 2023-01-08 13:49:25,356 epoch 14 - iter 4564/6529 - loss 0.13860764 - samples/sec: 19.21 - lr: 0.000003
538
- 2023-01-08 13:51:40,558 epoch 14 - iter 5216/6529 - loss 0.13787870 - samples/sec: 19.30 - lr: 0.000002
539
- 2023-01-08 13:53:57,761 epoch 14 - iter 5868/6529 - loss 0.13779242 - samples/sec: 19.02 - lr: 0.000002
540
- 2023-01-08 13:56:14,197 epoch 14 - iter 6520/6529 - loss 0.13672301 - samples/sec: 19.12 - lr: 0.000002
541
- 2023-01-08 13:56:15,980 ----------------------------------------------------------------------------------------------------
542
- 2023-01-08 13:56:15,983 EPOCH 14 done: loss 0.1367 - lr 0.0000024
543
- 2023-01-08 13:57:30,521 DEV : loss 0.13973088562488556 - f1-score (micro avg) 0.9037
544
- 2023-01-08 13:57:30,570 BAD EPOCHS (no improvement): 4
545
- 2023-01-08 13:57:30,572 ----------------------------------------------------------------------------------------------------
546
- 2023-01-08 13:59:46,249 epoch 15 - iter 652/6529 - loss 0.13280969 - samples/sec: 19.23 - lr: 0.000002
547
- 2023-01-08 14:01:59,714 epoch 15 - iter 1304/6529 - loss 0.13297304 - samples/sec: 19.55 - lr: 0.000002
548
- 2023-01-08 14:04:13,515 epoch 15 - iter 1956/6529 - loss 0.13331323 - samples/sec: 19.50 - lr: 0.000002
549
- 2023-01-08 14:06:30,305 epoch 15 - iter 2608/6529 - loss 0.13321780 - samples/sec: 19.07 - lr: 0.000002
550
- 2023-01-08 14:08:46,289 epoch 15 - iter 3260/6529 - loss 0.13439079 - samples/sec: 19.19 - lr: 0.000002
551
- 2023-01-08 14:11:04,469 epoch 15 - iter 3912/6529 - loss 0.13600843 - samples/sec: 18.88 - lr: 0.000002
552
- 2023-01-08 14:13:21,293 epoch 15 - iter 4564/6529 - loss 0.13579252 - samples/sec: 19.07 - lr: 0.000002
553
- 2023-01-08 14:15:33,406 epoch 15 - iter 5216/6529 - loss 0.13553200 - samples/sec: 19.75 - lr: 0.000002
554
- 2023-01-08 14:17:49,770 epoch 15 - iter 5868/6529 - loss 0.13548036 - samples/sec: 19.13 - lr: 0.000002
555
- 2023-01-08 14:20:05,553 epoch 15 - iter 6520/6529 - loss 0.13484085 - samples/sec: 19.22 - lr: 0.000002
556
- 2023-01-08 14:20:07,463 ----------------------------------------------------------------------------------------------------
557
- 2023-01-08 14:20:07,466 EPOCH 15 done: loss 0.1349 - lr 0.0000022
558
- 2023-01-08 14:21:24,464 DEV : loss 0.14579473435878754 - f1-score (micro avg) 0.9059
559
- 2023-01-08 14:21:24,516 BAD EPOCHS (no improvement): 4
560
- 2023-01-08 14:21:24,518 ----------------------------------------------------------------------------------------------------
561
- 2023-01-08 14:23:39,037 epoch 16 - iter 652/6529 - loss 0.13068872 - samples/sec: 19.40 - lr: 0.000002
562
- 2023-01-08 14:25:53,669 epoch 16 - iter 1304/6529 - loss 0.13040826 - samples/sec: 19.38 - lr: 0.000002
563
- 2023-01-08 14:28:08,991 epoch 16 - iter 1956/6529 - loss 0.13074354 - samples/sec: 19.28 - lr: 0.000002
564
- 2023-01-08 14:30:23,871 epoch 16 - iter 2608/6529 - loss 0.13159639 - samples/sec: 19.34 - lr: 0.000002
565
- 2023-01-08 14:32:41,690 epoch 16 - iter 3260/6529 - loss 0.13299574 - samples/sec: 18.93 - lr: 0.000002
566
- 2023-01-08 14:34:59,097 epoch 16 - iter 3912/6529 - loss 0.13394349 - samples/sec: 18.99 - lr: 0.000002
567
- 2023-01-08 14:37:15,659 epoch 16 - iter 4564/6529 - loss 0.13395312 - samples/sec: 19.11 - lr: 0.000002
568
- 2023-01-08 14:39:32,146 epoch 16 - iter 5216/6529 - loss 0.13371950 - samples/sec: 19.12 - lr: 0.000002
569
- 2023-01-08 14:41:51,422 epoch 16 - iter 5868/6529 - loss 0.13359614 - samples/sec: 18.73 - lr: 0.000002
570
- 2023-01-08 14:44:09,052 epoch 16 - iter 6520/6529 - loss 0.13321435 - samples/sec: 18.96 - lr: 0.000002
571
- 2023-01-08 14:44:10,865 ----------------------------------------------------------------------------------------------------
572
- 2023-01-08 14:44:10,869 EPOCH 16 done: loss 0.1332 - lr 0.0000020
573
- 2023-01-08 14:45:30,000 DEV : loss 0.14927005767822266 - f1-score (micro avg) 0.9049
574
- 2023-01-08 14:45:30,051 BAD EPOCHS (no improvement): 4
575
- 2023-01-08 14:45:30,053 ----------------------------------------------------------------------------------------------------
576
- 2023-01-08 14:47:46,130 epoch 17 - iter 652/6529 - loss 0.12683164 - samples/sec: 19.17 - lr: 0.000002
577
- 2023-01-08 14:50:02,615 epoch 17 - iter 1304/6529 - loss 0.12923492 - samples/sec: 19.12 - lr: 0.000002
578
- 2023-01-08 14:52:17,903 epoch 17 - iter 1956/6529 - loss 0.12797654 - samples/sec: 19.29 - lr: 0.000002
579
- 2023-01-08 14:54:35,065 epoch 17 - iter 2608/6529 - loss 0.12929489 - samples/sec: 19.02 - lr: 0.000002
580
- 2023-01-08 14:56:56,125 epoch 17 - iter 3260/6529 - loss 0.12964457 - samples/sec: 18.50 - lr: 0.000002
581
- 2023-01-08 14:59:15,274 epoch 17 - iter 3912/6529 - loss 0.13117108 - samples/sec: 18.75 - lr: 0.000002
582
- 2023-01-08 15:01:32,817 epoch 17 - iter 4564/6529 - loss 0.13181821 - samples/sec: 18.97 - lr: 0.000002
583
- 2023-01-08 15:03:48,960 epoch 17 - iter 5216/6529 - loss 0.13160885 - samples/sec: 19.16 - lr: 0.000002
584
- 2023-01-08 15:06:06,361 epoch 17 - iter 5868/6529 - loss 0.13181237 - samples/sec: 18.99 - lr: 0.000002
585
- 2023-01-08 15:08:27,233 epoch 17 - iter 6520/6529 - loss 0.13154532 - samples/sec: 18.52 - lr: 0.000002
586
- 2023-01-08 15:08:29,140 ----------------------------------------------------------------------------------------------------
587
- 2023-01-08 15:08:29,143 EPOCH 17 done: loss 0.1315 - lr 0.0000018
588
- 2023-01-08 15:09:47,453 DEV : loss 0.15806013345718384 - f1-score (micro avg) 0.9069
589
- 2023-01-08 15:09:47,506 BAD EPOCHS (no improvement): 4
590
- 2023-01-08 15:09:47,508 ----------------------------------------------------------------------------------------------------
591
- 2023-01-08 15:12:04,468 epoch 18 - iter 652/6529 - loss 0.12643756 - samples/sec: 19.05 - lr: 0.000002
592
- 2023-01-08 15:14:20,015 epoch 18 - iter 1304/6529 - loss 0.12885445 - samples/sec: 19.25 - lr: 0.000002
593
- 2023-01-08 15:16:33,243 epoch 18 - iter 1956/6529 - loss 0.12848283 - samples/sec: 19.58 - lr: 0.000002
594
- 2023-01-08 15:18:51,096 epoch 18 - iter 2608/6529 - loss 0.12838968 - samples/sec: 18.93 - lr: 0.000002
595
- 2023-01-08 15:21:10,087 epoch 18 - iter 3260/6529 - loss 0.12981736 - samples/sec: 18.77 - lr: 0.000002
596
- 2023-01-08 15:23:29,111 epoch 18 - iter 3912/6529 - loss 0.13057931 - samples/sec: 18.77 - lr: 0.000002
597
- 2023-01-08 15:25:43,545 epoch 18 - iter 4564/6529 - loss 0.13001931 - samples/sec: 19.41 - lr: 0.000002
598
- 2023-01-08 15:27:54,953 epoch 18 - iter 5216/6529 - loss 0.12977986 - samples/sec: 19.86 - lr: 0.000002
599
- 2023-01-08 15:30:14,214 epoch 18 - iter 5868/6529 - loss 0.12975537 - samples/sec: 18.74 - lr: 0.000002
600
- 2023-01-08 15:32:36,219 epoch 18 - iter 6520/6529 - loss 0.12954521 - samples/sec: 18.37 - lr: 0.000002
601
- 2023-01-08 15:32:38,167 ----------------------------------------------------------------------------------------------------
602
- 2023-01-08 15:32:38,170 EPOCH 18 done: loss 0.1296 - lr 0.0000016
603
- 2023-01-08 15:33:54,240 DEV : loss 0.15394894778728485 - f1-score (micro avg) 0.9052
604
- 2023-01-08 15:33:54,298 BAD EPOCHS (no improvement): 4
605
- 2023-01-08 15:33:54,299 ----------------------------------------------------------------------------------------------------
606
- 2023-01-08 15:36:11,665 epoch 19 - iter 652/6529 - loss 0.12689102 - samples/sec: 18.99 - lr: 0.000002
607
- 2023-01-08 15:38:29,261 epoch 19 - iter 1304/6529 - loss 0.12725324 - samples/sec: 18.96 - lr: 0.000002
608
- 2023-01-08 15:40:44,876 epoch 19 - iter 1956/6529 - loss 0.12817227 - samples/sec: 19.24 - lr: 0.000001
609
- 2023-01-08 15:43:06,566 epoch 19 - iter 2608/6529 - loss 0.12814054 - samples/sec: 18.41 - lr: 0.000001
610
- 2023-01-08 15:45:24,791 epoch 19 - iter 3260/6529 - loss 0.12827850 - samples/sec: 18.88 - lr: 0.000001
611
- 2023-01-08 15:47:45,814 epoch 19 - iter 3912/6529 - loss 0.12965719 - samples/sec: 18.50 - lr: 0.000001
612
- 2023-01-08 15:50:07,518 epoch 19 - iter 4564/6529 - loss 0.12988621 - samples/sec: 18.41 - lr: 0.000001
613
- 2023-01-08 15:52:23,448 epoch 19 - iter 5216/6529 - loss 0.12933048 - samples/sec: 19.19 - lr: 0.000001
614
- 2023-01-08 15:54:39,323 epoch 19 - iter 5868/6529 - loss 0.12917830 - samples/sec: 19.20 - lr: 0.000001
615
- 2023-01-08 15:56:59,965 epoch 19 - iter 6520/6529 - loss 0.12867037 - samples/sec: 18.55 - lr: 0.000001
616
- 2023-01-08 15:57:01,962 ----------------------------------------------------------------------------------------------------
617
- 2023-01-08 15:57:01,966 EPOCH 19 done: loss 0.1287 - lr 0.0000013
618
- 2023-01-08 15:58:18,604 DEV : loss 0.16147495806217194 - f1-score (micro avg) 0.9081
619
- 2023-01-08 15:58:18,654 BAD EPOCHS (no improvement): 4
620
- 2023-01-08 15:58:18,655 ----------------------------------------------------------------------------------------------------
621
- 2023-01-08 16:00:36,476 epoch 20 - iter 652/6529 - loss 0.12667596 - samples/sec: 18.93 - lr: 0.000001
622
- 2023-01-08 16:02:52,155 epoch 20 - iter 1304/6529 - loss 0.12812912 - samples/sec: 19.23 - lr: 0.000001
623
- 2023-01-08 16:05:08,323 epoch 20 - iter 1956/6529 - loss 0.12789917 - samples/sec: 19.16 - lr: 0.000001
624
- 2023-01-08 16:07:30,606 epoch 20 - iter 2608/6529 - loss 0.12736327 - samples/sec: 18.34 - lr: 0.000001
625
- 2023-01-08 16:09:54,711 epoch 20 - iter 3260/6529 - loss 0.12770827 - samples/sec: 18.11 - lr: 0.000001
626
- 2023-01-08 16:12:15,676 epoch 20 - iter 3912/6529 - loss 0.12872290 - samples/sec: 18.51 - lr: 0.000001
627
- 2023-01-08 16:14:33,832 epoch 20 - iter 4564/6529 - loss 0.12840905 - samples/sec: 18.89 - lr: 0.000001
628
- 2023-01-08 16:16:47,092 epoch 20 - iter 5216/6529 - loss 0.12787078 - samples/sec: 19.58 - lr: 0.000001
629
- 2023-01-08 16:19:11,028 epoch 20 - iter 5868/6529 - loss 0.12740936 - samples/sec: 18.13 - lr: 0.000001
630
- 2023-01-08 16:21:31,583 epoch 20 - iter 6520/6529 - loss 0.12713130 - samples/sec: 18.56 - lr: 0.000001
631
- 2023-01-08 16:21:33,402 ----------------------------------------------------------------------------------------------------
632
- 2023-01-08 16:21:33,405 EPOCH 20 done: loss 0.1271 - lr 0.0000011
633
- 2023-01-08 16:22:49,932 DEV : loss 0.159995898604393 - f1-score (micro avg) 0.9074
634
- 2023-01-08 16:22:49,984 BAD EPOCHS (no improvement): 4
635
- 2023-01-08 16:22:49,987 ----------------------------------------------------------------------------------------------------
636
- 2023-01-08 16:25:14,175 epoch 21 - iter 652/6529 - loss 0.12640983 - samples/sec: 18.10 - lr: 0.000001
637
- 2023-01-08 16:27:31,741 epoch 21 - iter 1304/6529 - loss 0.12615217 - samples/sec: 18.97 - lr: 0.000001
638
- 2023-01-08 16:29:44,470 epoch 21 - iter 1956/6529 - loss 0.12594671 - samples/sec: 19.66 - lr: 0.000001
639
- 2023-01-08 16:32:00,986 epoch 21 - iter 2608/6529 - loss 0.12622617 - samples/sec: 19.11 - lr: 0.000001
640
- 2023-01-08 16:34:16,973 epoch 21 - iter 3260/6529 - loss 0.12678245 - samples/sec: 19.19 - lr: 0.000001
641
- 2023-01-08 16:36:36,020 epoch 21 - iter 3912/6529 - loss 0.12658296 - samples/sec: 18.76 - lr: 0.000001
642
- 2023-01-08 16:38:57,014 epoch 21 - iter 4564/6529 - loss 0.12632625 - samples/sec: 18.51 - lr: 0.000001
643
- 2023-01-08 16:41:11,449 epoch 21 - iter 5216/6529 - loss 0.12593025 - samples/sec: 19.41 - lr: 0.000001
644
- 2023-01-08 16:43:32,645 epoch 21 - iter 5868/6529 - loss 0.12583151 - samples/sec: 18.48 - lr: 0.000001
645
- 2023-01-08 16:45:56,623 epoch 21 - iter 6520/6529 - loss 0.12527594 - samples/sec: 18.12 - lr: 0.000001
646
- 2023-01-08 16:45:58,452 ----------------------------------------------------------------------------------------------------
647
- 2023-01-08 16:45:58,455 EPOCH 21 done: loss 0.1253 - lr 0.0000009
648
- 2023-01-08 16:47:16,916 DEV : loss 0.16192322969436646 - f1-score (micro avg) 0.9084
649
- 2023-01-08 16:47:16,966 BAD EPOCHS (no improvement): 4
650
- 2023-01-08 16:47:16,968 ----------------------------------------------------------------------------------------------------
651
- 2023-01-08 16:49:36,861 epoch 22 - iter 652/6529 - loss 0.12905441 - samples/sec: 18.65 - lr: 0.000001
652
- 2023-01-08 16:51:51,836 epoch 22 - iter 1304/6529 - loss 0.12623396 - samples/sec: 19.33 - lr: 0.000001
653
- 2023-01-08 16:54:09,617 epoch 22 - iter 1956/6529 - loss 0.12598739 - samples/sec: 18.94 - lr: 0.000001
654
- 2023-01-08 16:56:26,735 epoch 22 - iter 2608/6529 - loss 0.12629697 - samples/sec: 19.03 - lr: 0.000001
655
- 2023-01-08 16:58:48,363 epoch 22 - iter 3260/6529 - loss 0.12612927 - samples/sec: 18.42 - lr: 0.000001
656
- 2023-01-08 17:01:07,899 epoch 22 - iter 3912/6529 - loss 0.12713583 - samples/sec: 18.70 - lr: 0.000001
657
- 2023-01-08 17:03:25,452 epoch 22 - iter 4564/6529 - loss 0.12733269 - samples/sec: 18.97 - lr: 0.000001
658
- 2023-01-08 17:05:38,176 epoch 22 - iter 5216/6529 - loss 0.12691323 - samples/sec: 19.66 - lr: 0.000001
659
- 2023-01-08 17:07:56,590 epoch 22 - iter 5868/6529 - loss 0.12649450 - samples/sec: 18.85 - lr: 0.000001
660
- 2023-01-08 17:10:19,008 epoch 22 - iter 6520/6529 - loss 0.12612071 - samples/sec: 18.32 - lr: 0.000001
661
- 2023-01-08 17:10:20,935 ----------------------------------------------------------------------------------------------------
662
- 2023-01-08 17:10:20,938 EPOCH 22 done: loss 0.1261 - lr 0.0000007
663
- 2023-01-08 17:11:39,673 DEV : loss 0.160598024725914 - f1-score (micro avg) 0.9095
664
- 2023-01-08 17:11:39,726 BAD EPOCHS (no improvement): 4
665
- 2023-01-08 17:11:39,727 ----------------------------------------------------------------------------------------------------
666
- 2023-01-08 17:13:59,008 epoch 23 - iter 652/6529 - loss 0.12423740 - samples/sec: 18.73 - lr: 0.000001
667
- 2023-01-08 17:16:15,015 epoch 23 - iter 1304/6529 - loss 0.12636817 - samples/sec: 19.18 - lr: 0.000001
668
- 2023-01-08 17:18:30,677 epoch 23 - iter 1956/6529 - loss 0.12724886 - samples/sec: 19.23 - lr: 0.000001
669
- 2023-01-08 17:20:43,439 epoch 23 - iter 2608/6529 - loss 0.12629184 - samples/sec: 19.65 - lr: 0.000001
670
- 2023-01-08 17:23:01,405 epoch 23 - iter 3260/6529 - loss 0.12601784 - samples/sec: 18.91 - lr: 0.000001
671
- 2023-01-08 17:25:13,272 epoch 23 - iter 3912/6529 - loss 0.12569715 - samples/sec: 19.79 - lr: 0.000001
672
- 2023-01-08 17:27:32,247 epoch 23 - iter 4564/6529 - loss 0.12589309 - samples/sec: 18.77 - lr: 0.000001
673
- 2023-01-08 17:29:49,487 epoch 23 - iter 5216/6529 - loss 0.12574754 - samples/sec: 19.01 - lr: 0.000000
674
- 2023-01-08 17:32:05,380 epoch 23 - iter 5868/6529 - loss 0.12524206 - samples/sec: 19.20 - lr: 0.000000
675
- 2023-01-08 17:34:23,467 epoch 23 - iter 6520/6529 - loss 0.12483199 - samples/sec: 18.89 - lr: 0.000000
676
- 2023-01-08 17:34:25,411 ----------------------------------------------------------------------------------------------------
677
- 2023-01-08 17:34:25,414 EPOCH 23 done: loss 0.1248 - lr 0.0000004
678
- 2023-01-08 17:35:39,808 DEV : loss 0.161932572722435 - f1-score (micro avg) 0.9107
679
- 2023-01-08 17:35:39,860 BAD EPOCHS (no improvement): 4
680
- 2023-01-08 17:35:39,861 ----------------------------------------------------------------------------------------------------
681
- 2023-01-08 17:38:02,048 epoch 24 - iter 652/6529 - loss 0.12624568 - samples/sec: 18.35 - lr: 0.000000
682
- 2023-01-08 17:40:21,980 epoch 24 - iter 1304/6529 - loss 0.12467521 - samples/sec: 18.65 - lr: 0.000000
683
- 2023-01-08 17:42:40,219 epoch 24 - iter 1956/6529 - loss 0.12463381 - samples/sec: 18.87 - lr: 0.000000
684
- 2023-01-08 17:45:00,289 epoch 24 - iter 2608/6529 - loss 0.12397927 - samples/sec: 18.63 - lr: 0.000000
685
- 2023-01-08 17:47:21,805 epoch 24 - iter 3260/6529 - loss 0.12523877 - samples/sec: 18.44 - lr: 0.000000
686
- 2023-01-08 17:49:40,558 epoch 24 - iter 3912/6529 - loss 0.12577788 - samples/sec: 18.80 - lr: 0.000000
687
- 2023-01-08 17:51:56,485 epoch 24 - iter 4564/6529 - loss 0.12575965 - samples/sec: 19.20 - lr: 0.000000
688
- 2023-01-08 17:54:10,689 epoch 24 - iter 5216/6529 - loss 0.12505960 - samples/sec: 19.44 - lr: 0.000000
689
- 2023-01-08 17:56:28,819 epoch 24 - iter 5868/6529 - loss 0.12454837 - samples/sec: 18.89 - lr: 0.000000
690
- 2023-01-08 17:58:56,244 epoch 24 - iter 6520/6529 - loss 0.12429321 - samples/sec: 17.70 - lr: 0.000000
691
- 2023-01-08 17:58:58,354 ----------------------------------------------------------------------------------------------------
692
- 2023-01-08 17:58:58,357 EPOCH 24 done: loss 0.1243 - lr 0.0000002
693
- 2023-01-08 18:00:13,633 DEV : loss 0.16107264161109924 - f1-score (micro avg) 0.9111
694
- 2023-01-08 18:00:13,688 BAD EPOCHS (no improvement): 4
695
  2023-01-08 18:00:13,690 ----------------------------------------------------------------------------------------------------
696
  2023-01-08 18:02:30,863 epoch 25 - iter 652/6529 - loss 0.12185023 - samples/sec: 19.02 - lr: 0.000000
697
  2023-01-08 18:04:48,105 epoch 25 - iter 1304/6529 - loss 0.12151675 - samples/sec: 19.01 - lr: 0.000000
 
9
  (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
10
  (dropout): Dropout(p=0.1, inplace=False)
11
  )
12
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  (pooler): BertPooler(
14
  (dense): Linear(in_features=768, out_features=768, bias=True)
15
  (activation): Tanh()
16
  )
 
17
  )
18
  (word_dropout): WordDropout(p=0.05)
19
  (locked_dropout): LockedDropout(p=0.5)
 
35
  2023-01-08 08:23:21,514 - train_with_dev: "False"
36
  2023-01-08 08:23:21,515 - batch_growth_annealing: "False"
37
  2023-01-08 08:23:21,516 ----------------------------------------------------------------------------------------------------
38
+ 2023-01-08 08:23:21,517 Model training base path: "resources/taggers/NSURL-2019_25epochs"
39
  2023-01-08 08:23:21,518 ----------------------------------------------------------------------------------------------------
40
  2023-01-08 08:23:21,519 Device: cuda:0
41
  2023-01-08 08:23:21,519 ----------------------------------------------------------------------------------------------------
42
  2023-01-08 08:23:21,520 Embeddings storage mode: none
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  2023-01-08 18:00:13,690 ----------------------------------------------------------------------------------------------------
44
  2023-01-08 18:02:30,863 epoch 25 - iter 652/6529 - loss 0.12185023 - samples/sec: 19.02 - lr: 0.000000
45
  2023-01-08 18:04:48,105 epoch 25 - iter 1304/6529 - loss 0.12151675 - samples/sec: 19.01 - lr: 0.000000