Add de2en and en2de models
Browse files- models/de2en/character_end2end_embeddings_with_attention/log.txt +154 -0
- models/de2en/character_end2end_embeddings_with_attention/model.pt +3 -0
- models/de2en/character_end2end_embeddings_without_attention/log.txt +218 -0
- models/de2en/character_end2end_embeddings_without_attention/model.pt +3 -0
- models/de2en/word_end2end_embeddings_with_attention/log.txt +208 -0
- models/de2en/word_end2end_embeddings_with_attention/model.pt +3 -0
- models/de2en/word_end2end_embeddings_without_attention/log.txt +218 -0
- models/de2en/word_end2end_embeddings_without_attention/model.pt +3 -0
- models/de2en/word_word2vec_embeddings_with_attention/log.txt +225 -0
- models/de2en/word_word2vec_embeddings_with_attention/model.pt +3 -0
- models/de2en/word_word2vec_embeddings_without_attention/log.txt +219 -0
- models/de2en/word_word2vec_embeddings_without_attention/model.pt +3 -0
- models/en2de/character_end2end_embeddings_with_attention/log.txt +154 -0
- models/en2de/character_end2end_embeddings_with_attention/model.pt +3 -0
- models/en2de/character_end2end_embeddings_without_attention/log.txt +147 -0
- models/en2de/character_end2end_embeddings_without_attention/model.pt +3 -0
- models/en2de/word_end2end_embeddings_with_attention/log.txt +225 -0
- models/en2de/word_end2end_embeddings_with_attention/model.pt +3 -0
- models/en2de/word_end2end_embeddings_without_attention/log.txt +219 -0
- models/en2de/word_end2end_embeddings_without_attention/model.pt +3 -0
- models/en2de/word_word2vec_embeddings_with_attention/log.txt +226 -0
- models/en2de/word_word2vec_embeddings_with_attention/model.pt +3 -0
- models/en2de/word_word2vec_embeddings_without_attention/log.txt +0 -0
- models/en2de/word_word2vec_embeddings_without_attention/model.pt +3 -0
models/de2en/character_end2end_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 04:42:24,085 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 04:42:24,085 Training Model
|
3 |
+
2024-07-29 04:42:24,085 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 04:42:24,085 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(112, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(114, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=114, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-29 04:42:24,085 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-29 04:42:24,085 Training Hyperparameters:
|
27 |
+
2024-07-29 04:42:24,086 - max_epochs: 10
|
28 |
+
2024-07-29 04:42:24,086 - learning_rate: 0.001
|
29 |
+
2024-07-29 04:42:24,086 - batch_size: 128
|
30 |
+
2024-07-29 04:42:24,086 - patience: 5
|
31 |
+
2024-07-29 04:42:24,086 - scheduler_patience: 3
|
32 |
+
2024-07-29 04:42:24,086 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-29 04:42:24,086 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-29 04:42:24,086 Computational Parameters:
|
35 |
+
2024-07-29 04:42:24,086 - num_workers: 4
|
36 |
+
2024-07-29 04:42:24,086 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-29 04:42:24,086 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-29 04:42:24,086 Dataset Splits:
|
39 |
+
2024-07-29 04:42:24,086 - train: 133623 data points
|
40 |
+
2024-07-29 04:42:24,086 - dev: 19090 data points
|
41 |
+
2024-07-29 04:42:24,086 - test: 38179 data points
|
42 |
+
2024-07-29 04:42:24,086 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-29 04:42:24,086 EPOCH 1
|
44 |
+
2024-07-29 04:45:28,740 batch 104/1044 - loss 2.83774002 - lr 0.0010 - time 184.65s
|
45 |
+
2024-07-29 04:48:40,216 batch 208/1044 - loss 2.69157360 - lr 0.0010 - time 376.13s
|
46 |
+
2024-07-29 04:51:43,099 batch 312/1044 - loss 2.61162177 - lr 0.0010 - time 559.01s
|
47 |
+
2024-07-29 04:54:47,347 batch 416/1044 - loss 2.55821791 - lr 0.0010 - time 743.26s
|
48 |
+
2024-07-29 04:57:41,820 batch 520/1044 - loss 2.51365572 - lr 0.0010 - time 917.73s
|
49 |
+
2024-07-29 05:00:56,513 batch 624/1044 - loss 2.47733882 - lr 0.0010 - time 1112.43s
|
50 |
+
2024-07-29 05:03:37,940 batch 728/1044 - loss 2.44389959 - lr 0.0010 - time 1273.85s
|
51 |
+
2024-07-29 05:06:24,919 batch 832/1044 - loss 2.40961261 - lr 0.0010 - time 1440.83s
|
52 |
+
2024-07-29 05:09:17,364 batch 936/1044 - loss 2.37903219 - lr 0.0010 - time 1613.28s
|
53 |
+
2024-07-29 05:12:21,697 batch 1040/1044 - loss 2.34951652 - lr 0.0010 - time 1797.61s
|
54 |
+
2024-07-29 05:12:26,875 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-29 05:12:26,878 EPOCH 1 DONE
|
56 |
+
2024-07-29 05:13:13,524 TRAIN Loss: 2.3485
|
57 |
+
2024-07-29 05:13:13,524 DEV Loss: 3.6517
|
58 |
+
2024-07-29 05:13:13,524 DEV Perplexity: 38.5406
|
59 |
+
2024-07-29 05:13:13,524 New best score!
|
60 |
+
2024-07-29 05:13:13,525 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-29 05:13:13,525 EPOCH 2
|
62 |
+
2024-07-29 05:16:13,790 batch 104/1044 - loss 2.03566457 - lr 0.0010 - time 180.26s
|
63 |
+
2024-07-29 05:19:18,503 batch 208/1044 - loss 2.01975197 - lr 0.0010 - time 364.98s
|
64 |
+
2024-07-29 05:22:27,296 batch 312/1044 - loss 2.01034658 - lr 0.0010 - time 553.77s
|
65 |
+
2024-07-29 05:25:25,285 batch 416/1044 - loss 1.99740521 - lr 0.0010 - time 731.76s
|
66 |
+
2024-07-29 05:28:20,014 batch 520/1044 - loss 1.98132378 - lr 0.0010 - time 906.49s
|
67 |
+
2024-07-29 05:31:18,268 batch 624/1044 - loss 1.96808053 - lr 0.0010 - time 1084.74s
|
68 |
+
2024-07-29 05:34:09,289 batch 728/1044 - loss 1.95846857 - lr 0.0010 - time 1255.76s
|
69 |
+
2024-07-29 05:37:18,613 batch 832/1044 - loss 1.94876255 - lr 0.0010 - time 1445.09s
|
70 |
+
2024-07-29 05:40:08,339 batch 936/1044 - loss 1.93996137 - lr 0.0010 - time 1614.81s
|
71 |
+
2024-07-29 05:43:14,442 batch 1040/1044 - loss 1.93165519 - lr 0.0010 - time 1800.92s
|
72 |
+
2024-07-29 05:43:22,242 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-29 05:43:22,245 EPOCH 2 DONE
|
74 |
+
2024-07-29 05:44:08,478 TRAIN Loss: 1.9311
|
75 |
+
2024-07-29 05:44:08,478 DEV Loss: 3.8520
|
76 |
+
2024-07-29 05:44:08,478 DEV Perplexity: 47.0890
|
77 |
+
2024-07-29 05:44:08,478 No improvement for 1 epoch(s)
|
78 |
+
2024-07-29 05:44:08,478 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-29 05:44:08,478 EPOCH 3
|
80 |
+
2024-07-29 05:47:08,374 batch 104/1044 - loss 1.82115972 - lr 0.0010 - time 179.90s
|
81 |
+
2024-07-29 05:50:01,154 batch 208/1044 - loss 1.82096945 - lr 0.0010 - time 352.68s
|
82 |
+
2024-07-29 05:52:50,432 batch 312/1044 - loss 1.81545555 - lr 0.0010 - time 521.95s
|
83 |
+
2024-07-29 05:56:01,839 batch 416/1044 - loss 1.80929346 - lr 0.0010 - time 713.36s
|
84 |
+
2024-07-29 05:59:06,754 batch 520/1044 - loss 1.80582210 - lr 0.0010 - time 898.28s
|
85 |
+
2024-07-29 06:02:08,393 batch 624/1044 - loss 1.79901005 - lr 0.0010 - time 1079.91s
|
86 |
+
2024-07-29 06:05:20,722 batch 728/1044 - loss 1.79334588 - lr 0.0010 - time 1272.24s
|
87 |
+
2024-07-29 06:08:05,926 batch 832/1044 - loss 1.78740208 - lr 0.0010 - time 1437.45s
|
88 |
+
2024-07-29 06:10:59,878 batch 936/1044 - loss 1.78273205 - lr 0.0010 - time 1611.40s
|
89 |
+
2024-07-29 06:14:05,067 batch 1040/1044 - loss 1.77811699 - lr 0.0010 - time 1796.59s
|
90 |
+
2024-07-29 06:14:11,499 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-29 06:14:11,502 EPOCH 3 DONE
|
92 |
+
2024-07-29 06:14:57,787 TRAIN Loss: 1.7781
|
93 |
+
2024-07-29 06:14:57,787 DEV Loss: 3.8704
|
94 |
+
2024-07-29 06:14:57,787 DEV Perplexity: 47.9616
|
95 |
+
2024-07-29 06:14:57,787 No improvement for 2 epoch(s)
|
96 |
+
2024-07-29 06:14:57,787 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-29 06:14:57,787 EPOCH 4
|
98 |
+
2024-07-29 06:17:47,582 batch 104/1044 - loss 1.70287426 - lr 0.0010 - time 169.79s
|
99 |
+
2024-07-29 06:20:46,670 batch 208/1044 - loss 1.70862214 - lr 0.0010 - time 348.88s
|
100 |
+
2024-07-29 06:23:36,046 batch 312/1044 - loss 1.71112499 - lr 0.0010 - time 518.26s
|
101 |
+
2024-07-29 06:26:40,139 batch 416/1044 - loss 1.70913852 - lr 0.0010 - time 702.35s
|
102 |
+
2024-07-29 06:29:35,197 batch 520/1044 - loss 1.70622180 - lr 0.0010 - time 877.41s
|
103 |
+
2024-07-29 06:32:46,182 batch 624/1044 - loss 1.70036061 - lr 0.0010 - time 1068.40s
|
104 |
+
2024-07-29 06:35:40,667 batch 728/1044 - loss 1.69717816 - lr 0.0010 - time 1242.88s
|
105 |
+
2024-07-29 06:38:48,262 batch 832/1044 - loss 1.69264350 - lr 0.0010 - time 1430.47s
|
106 |
+
2024-07-29 06:41:50,576 batch 936/1044 - loss 1.68914897 - lr 0.0010 - time 1612.79s
|
107 |
+
2024-07-29 06:44:53,396 batch 1040/1044 - loss 1.68603338 - lr 0.0010 - time 1795.61s
|
108 |
+
2024-07-29 06:45:02,616 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-29 06:45:02,618 EPOCH 4 DONE
|
110 |
+
2024-07-29 06:45:48,992 TRAIN Loss: 1.6859
|
111 |
+
2024-07-29 06:45:48,992 DEV Loss: 4.0447
|
112 |
+
2024-07-29 06:45:48,992 DEV Perplexity: 57.0919
|
113 |
+
2024-07-29 06:45:48,992 No improvement for 3 epoch(s)
|
114 |
+
2024-07-29 06:45:48,992 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-29 06:45:48,992 EPOCH 5
|
116 |
+
2024-07-29 06:48:45,115 batch 104/1044 - loss 1.63299357 - lr 0.0010 - time 176.12s
|
117 |
+
2024-07-29 06:51:35,807 batch 208/1044 - loss 1.62950270 - lr 0.0010 - time 346.81s
|
118 |
+
2024-07-29 06:54:46,072 batch 312/1044 - loss 1.63286690 - lr 0.0010 - time 537.08s
|
119 |
+
2024-07-29 06:57:45,633 batch 416/1044 - loss 1.63218283 - lr 0.0010 - time 716.64s
|
120 |
+
2024-07-29 07:00:43,309 batch 520/1044 - loss 1.63061902 - lr 0.0010 - time 894.32s
|
121 |
+
2024-07-29 07:03:35,780 batch 624/1044 - loss 1.62754329 - lr 0.0010 - time 1066.79s
|
122 |
+
2024-07-29 07:06:38,813 batch 728/1044 - loss 1.62501707 - lr 0.0010 - time 1249.82s
|
123 |
+
2024-07-29 07:09:45,197 batch 832/1044 - loss 1.62138438 - lr 0.0010 - time 1436.20s
|
124 |
+
2024-07-29 07:12:45,352 batch 936/1044 - loss 1.61940890 - lr 0.0010 - time 1616.36s
|
125 |
+
2024-07-29 07:15:49,279 batch 1040/1044 - loss 1.61704726 - lr 0.0010 - time 1800.29s
|
126 |
+
2024-07-29 07:15:54,552 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-29 07:15:54,554 EPOCH 5 DONE
|
128 |
+
2024-07-29 07:16:40,924 TRAIN Loss: 1.6170
|
129 |
+
2024-07-29 07:16:40,924 DEV Loss: 4.1085
|
130 |
+
2024-07-29 07:16:40,924 DEV Perplexity: 60.8567
|
131 |
+
2024-07-29 07:16:40,924 No improvement for 4 epoch(s)
|
132 |
+
2024-07-29 07:16:40,925 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-29 07:16:40,925 EPOCH 6
|
134 |
+
2024-07-29 07:19:49,195 batch 104/1044 - loss 1.56859132 - lr 0.0001 - time 188.27s
|
135 |
+
2024-07-29 07:22:42,313 batch 208/1044 - loss 1.56012409 - lr 0.0001 - time 361.39s
|
136 |
+
2024-07-29 07:25:52,599 batch 312/1044 - loss 1.55991665 - lr 0.0001 - time 551.67s
|
137 |
+
2024-07-29 07:28:54,507 batch 416/1044 - loss 1.55604229 - lr 0.0001 - time 733.58s
|
138 |
+
2024-07-29 07:31:52,384 batch 520/1044 - loss 1.55617112 - lr 0.0001 - time 911.46s
|
139 |
+
2024-07-29 07:35:13,662 batch 624/1044 - loss 1.55762414 - lr 0.0001 - time 1112.74s
|
140 |
+
2024-07-29 07:38:13,403 batch 728/1044 - loss 1.55602573 - lr 0.0001 - time 1292.48s
|
141 |
+
2024-07-29 07:41:07,426 batch 832/1044 - loss 1.55367613 - lr 0.0001 - time 1466.50s
|
142 |
+
2024-07-29 07:43:47,779 batch 936/1044 - loss 1.55211146 - lr 0.0001 - time 1626.85s
|
143 |
+
2024-07-29 07:46:37,945 batch 1040/1044 - loss 1.55028499 - lr 0.0001 - time 1797.02s
|
144 |
+
2024-07-29 07:46:44,673 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 07:46:44,675 EPOCH 6 DONE
|
146 |
+
2024-07-29 07:47:30,994 TRAIN Loss: 1.5500
|
147 |
+
2024-07-29 07:47:30,994 DEV Loss: 4.2097
|
148 |
+
2024-07-29 07:47:30,994 DEV Perplexity: 67.3339
|
149 |
+
2024-07-29 07:47:30,994 No improvement for 5 epoch(s)
|
150 |
+
2024-07-29 07:47:30,994 Patience reached: Terminating model training due to early stopping
|
151 |
+
2024-07-29 07:47:30,994 ----------------------------------------------------------------------------------------------------
|
152 |
+
2024-07-29 07:47:30,994 Finished Training
|
153 |
+
2024-07-29 07:49:01,240 TEST Perplexity: 38.3937
|
154 |
+
2024-07-29 08:00:07,776 TEST BLEU = 2.84 44.3/10.4/0.5/0.3 (BP = 1.000 ratio = 1.000 hyp_len = 97 ref_len = 97)
|
models/de2en/character_end2end_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:578f5c9e3a3f89b1a26d14e559cb417c537d23c9fe50097ce683bea7a91517fd
|
3 |
+
size 15949864
|
models/de2en/character_end2end_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 08:00:18,857 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 08:00:18,857 Training Model
|
3 |
+
2024-07-29 08:00:18,857 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 08:00:18,857 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(112, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(114, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=114, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-29 08:00:18,857 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-29 08:00:18,857 Training Hyperparameters:
|
20 |
+
2024-07-29 08:00:18,857 - max_epochs: 10
|
21 |
+
2024-07-29 08:00:18,857 - learning_rate: 0.001
|
22 |
+
2024-07-29 08:00:18,857 - batch_size: 128
|
23 |
+
2024-07-29 08:00:18,857 - patience: 5
|
24 |
+
2024-07-29 08:00:18,857 - scheduler_patience: 3
|
25 |
+
2024-07-29 08:00:18,857 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-29 08:00:18,857 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-29 08:00:18,857 Computational Parameters:
|
28 |
+
2024-07-29 08:00:18,857 - num_workers: 4
|
29 |
+
2024-07-29 08:00:18,857 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-29 08:00:18,857 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-29 08:00:18,857 Dataset Splits:
|
32 |
+
2024-07-29 08:00:18,857 - train: 133623 data points
|
33 |
+
2024-07-29 08:00:18,857 - dev: 19090 data points
|
34 |
+
2024-07-29 08:00:18,857 - test: 38179 data points
|
35 |
+
2024-07-29 08:00:18,857 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-29 08:00:18,857 EPOCH 1
|
37 |
+
2024-07-29 08:01:11,410 batch 104/1044 - loss 2.76805122 - lr 0.0010 - time 52.55s
|
38 |
+
2024-07-29 08:02:04,056 batch 208/1044 - loss 2.64841842 - lr 0.0010 - time 105.20s
|
39 |
+
2024-07-29 08:02:56,265 batch 312/1044 - loss 2.57898955 - lr 0.0010 - time 157.41s
|
40 |
+
2024-07-29 08:03:46,595 batch 416/1044 - loss 2.53192030 - lr 0.0010 - time 207.74s
|
41 |
+
2024-07-29 08:04:40,746 batch 520/1044 - loss 2.49861492 - lr 0.0010 - time 261.89s
|
42 |
+
2024-07-29 08:05:33,770 batch 624/1044 - loss 2.46931939 - lr 0.0010 - time 314.91s
|
43 |
+
2024-07-29 08:06:24,759 batch 728/1044 - loss 2.44438299 - lr 0.0010 - time 365.90s
|
44 |
+
2024-07-29 08:07:15,000 batch 832/1044 - loss 2.42292007 - lr 0.0010 - time 416.14s
|
45 |
+
2024-07-29 08:08:10,355 batch 936/1044 - loss 2.40563072 - lr 0.0010 - time 471.50s
|
46 |
+
2024-07-29 08:09:01,448 batch 1040/1044 - loss 2.38826131 - lr 0.0010 - time 522.59s
|
47 |
+
2024-07-29 08:09:03,325 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-29 08:09:03,326 EPOCH 1 DONE
|
49 |
+
2024-07-29 08:09:29,447 TRAIN Loss: 2.3877
|
50 |
+
2024-07-29 08:09:29,447 DEV Loss: 3.3787
|
51 |
+
2024-07-29 08:09:29,447 DEV Perplexity: 29.3338
|
52 |
+
2024-07-29 08:09:29,447 New best score!
|
53 |
+
2024-07-29 08:09:29,448 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-29 08:09:29,448 EPOCH 2
|
55 |
+
2024-07-29 08:10:22,147 batch 104/1044 - loss 2.22209186 - lr 0.0010 - time 52.70s
|
56 |
+
2024-07-29 08:11:14,838 batch 208/1044 - loss 2.21444971 - lr 0.0010 - time 105.39s
|
57 |
+
2024-07-29 08:12:04,854 batch 312/1044 - loss 2.20841359 - lr 0.0010 - time 155.41s
|
58 |
+
2024-07-29 08:12:56,469 batch 416/1044 - loss 2.20362168 - lr 0.0010 - time 207.02s
|
59 |
+
2024-07-29 08:13:51,217 batch 520/1044 - loss 2.19630995 - lr 0.0010 - time 261.77s
|
60 |
+
2024-07-29 08:14:44,604 batch 624/1044 - loss 2.19130721 - lr 0.0010 - time 315.16s
|
61 |
+
2024-07-29 08:15:36,532 batch 728/1044 - loss 2.19013349 - lr 0.0010 - time 367.08s
|
62 |
+
2024-07-29 08:16:29,169 batch 832/1044 - loss 2.18569613 - lr 0.0010 - time 419.72s
|
63 |
+
2024-07-29 08:17:19,487 batch 936/1044 - loss 2.18181469 - lr 0.0010 - time 470.04s
|
64 |
+
2024-07-29 08:18:11,669 batch 1040/1044 - loss 2.17839243 - lr 0.0010 - time 522.22s
|
65 |
+
2024-07-29 08:18:13,741 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-29 08:18:13,742 EPOCH 2 DONE
|
67 |
+
2024-07-29 08:18:39,641 TRAIN Loss: 2.1782
|
68 |
+
2024-07-29 08:18:39,641 DEV Loss: 3.3972
|
69 |
+
2024-07-29 08:18:39,641 DEV Perplexity: 29.8794
|
70 |
+
2024-07-29 08:18:39,641 No improvement for 1 epoch(s)
|
71 |
+
2024-07-29 08:18:39,641 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-29 08:18:39,641 EPOCH 3
|
73 |
+
2024-07-29 08:19:31,378 batch 104/1044 - loss 2.12722631 - lr 0.0010 - time 51.74s
|
74 |
+
2024-07-29 08:20:24,836 batch 208/1044 - loss 2.13235184 - lr 0.0010 - time 105.19s
|
75 |
+
2024-07-29 08:21:17,271 batch 312/1044 - loss 2.13028342 - lr 0.0010 - time 157.63s
|
76 |
+
2024-07-29 08:22:07,107 batch 416/1044 - loss 2.12492348 - lr 0.0010 - time 207.47s
|
77 |
+
2024-07-29 08:22:57,615 batch 520/1044 - loss 2.12266913 - lr 0.0010 - time 257.97s
|
78 |
+
2024-07-29 08:23:51,655 batch 624/1044 - loss 2.12023626 - lr 0.0010 - time 312.01s
|
79 |
+
2024-07-29 08:24:46,620 batch 728/1044 - loss 2.11750023 - lr 0.0010 - time 366.98s
|
80 |
+
2024-07-29 08:25:39,463 batch 832/1044 - loss 2.11486574 - lr 0.0010 - time 419.82s
|
81 |
+
2024-07-29 08:26:30,363 batch 936/1044 - loss 2.11231703 - lr 0.0010 - time 470.72s
|
82 |
+
2024-07-29 08:27:22,767 batch 1040/1044 - loss 2.10992234 - lr 0.0010 - time 523.13s
|
83 |
+
2024-07-29 08:27:24,801 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-29 08:27:24,803 EPOCH 3 DONE
|
85 |
+
2024-07-29 08:27:50,628 TRAIN Loss: 2.1095
|
86 |
+
2024-07-29 08:27:50,628 DEV Loss: 3.5338
|
87 |
+
2024-07-29 08:27:50,628 DEV Perplexity: 34.2544
|
88 |
+
2024-07-29 08:27:50,628 No improvement for 2 epoch(s)
|
89 |
+
2024-07-29 08:27:50,628 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-29 08:27:50,628 EPOCH 4
|
91 |
+
2024-07-29 08:28:44,890 batch 104/1044 - loss 2.07187447 - lr 0.0010 - time 54.26s
|
92 |
+
2024-07-29 08:29:36,382 batch 208/1044 - loss 2.07457917 - lr 0.0010 - time 105.75s
|
93 |
+
2024-07-29 08:30:27,957 batch 312/1044 - loss 2.07431510 - lr 0.0010 - time 157.33s
|
94 |
+
2024-07-29 08:31:17,958 batch 416/1044 - loss 2.07086038 - lr 0.0010 - time 207.33s
|
95 |
+
2024-07-29 08:32:08,523 batch 520/1044 - loss 2.06997228 - lr 0.0010 - time 257.89s
|
96 |
+
2024-07-29 08:33:02,282 batch 624/1044 - loss 2.06882375 - lr 0.0010 - time 311.65s
|
97 |
+
2024-07-29 08:33:53,334 batch 728/1044 - loss 2.06778251 - lr 0.0010 - time 362.71s
|
98 |
+
2024-07-29 08:34:44,960 batch 832/1044 - loss 2.06494840 - lr 0.0010 - time 414.33s
|
99 |
+
2024-07-29 08:35:33,596 batch 936/1044 - loss 2.06337325 - lr 0.0010 - time 462.97s
|
100 |
+
2024-07-29 08:36:27,117 batch 1040/1044 - loss 2.06208232 - lr 0.0010 - time 516.49s
|
101 |
+
2024-07-29 08:36:29,163 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-29 08:36:29,165 EPOCH 4 DONE
|
103 |
+
2024-07-29 08:36:55,198 TRAIN Loss: 2.0621
|
104 |
+
2024-07-29 08:36:55,199 DEV Loss: 3.3576
|
105 |
+
2024-07-29 08:36:55,199 DEV Perplexity: 28.7215
|
106 |
+
2024-07-29 08:36:55,199 New best score!
|
107 |
+
2024-07-29 08:36:55,200 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-29 08:36:55,200 EPOCH 5
|
109 |
+
2024-07-29 08:37:45,761 batch 104/1044 - loss 2.02026682 - lr 0.0010 - time 50.56s
|
110 |
+
2024-07-29 08:38:35,589 batch 208/1044 - loss 2.02837674 - lr 0.0010 - time 100.39s
|
111 |
+
2024-07-29 08:39:31,272 batch 312/1044 - loss 2.03032399 - lr 0.0010 - time 156.07s
|
112 |
+
2024-07-29 08:40:21,402 batch 416/1044 - loss 2.02753028 - lr 0.0010 - time 206.20s
|
113 |
+
2024-07-29 08:41:12,690 batch 520/1044 - loss 2.02868050 - lr 0.0010 - time 257.49s
|
114 |
+
2024-07-29 08:42:03,486 batch 624/1044 - loss 2.02747123 - lr 0.0010 - time 308.29s
|
115 |
+
2024-07-29 08:42:57,210 batch 728/1044 - loss 2.02448596 - lr 0.0010 - time 362.01s
|
116 |
+
2024-07-29 08:43:47,970 batch 832/1044 - loss 2.02472333 - lr 0.0010 - time 412.77s
|
117 |
+
2024-07-29 08:44:41,582 batch 936/1044 - loss 2.02443669 - lr 0.0010 - time 466.38s
|
118 |
+
2024-07-29 08:45:35,484 batch 1040/1044 - loss 2.02412656 - lr 0.0010 - time 520.28s
|
119 |
+
2024-07-29 08:45:37,426 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-29 08:45:37,428 EPOCH 5 DONE
|
121 |
+
2024-07-29 08:46:03,483 TRAIN Loss: 2.0239
|
122 |
+
2024-07-29 08:46:03,483 DEV Loss: 3.5994
|
123 |
+
2024-07-29 08:46:03,483 DEV Perplexity: 36.5764
|
124 |
+
2024-07-29 08:46:03,483 No improvement for 1 epoch(s)
|
125 |
+
2024-07-29 08:46:03,483 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-29 08:46:03,483 EPOCH 6
|
127 |
+
2024-07-29 08:46:55,366 batch 104/1044 - loss 2.00739912 - lr 0.0010 - time 51.88s
|
128 |
+
2024-07-29 08:47:49,416 batch 208/1044 - loss 2.01236906 - lr 0.0010 - time 105.93s
|
129 |
+
2024-07-29 08:48:40,631 batch 312/1044 - loss 2.00802403 - lr 0.0010 - time 157.15s
|
130 |
+
2024-07-29 08:49:31,859 batch 416/1044 - loss 2.00383683 - lr 0.0010 - time 208.38s
|
131 |
+
2024-07-29 08:50:24,038 batch 520/1044 - loss 2.00740076 - lr 0.0010 - time 260.55s
|
132 |
+
2024-07-29 08:51:15,147 batch 624/1044 - loss 2.00523553 - lr 0.0010 - time 311.66s
|
133 |
+
2024-07-29 08:52:08,144 batch 728/1044 - loss 2.00501477 - lr 0.0010 - time 364.66s
|
134 |
+
2024-07-29 08:53:01,139 batch 832/1044 - loss 2.00346529 - lr 0.0010 - time 417.66s
|
135 |
+
2024-07-29 08:53:52,167 batch 936/1044 - loss 2.00414460 - lr 0.0010 - time 468.68s
|
136 |
+
2024-07-29 08:54:44,524 batch 1040/1044 - loss 2.00255805 - lr 0.0010 - time 521.04s
|
137 |
+
2024-07-29 08:54:46,659 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-29 08:54:46,661 EPOCH 6 DONE
|
139 |
+
2024-07-29 08:55:12,782 TRAIN Loss: 2.0025
|
140 |
+
2024-07-29 08:55:12,782 DEV Loss: 3.3489
|
141 |
+
2024-07-29 08:55:12,782 DEV Perplexity: 28.4717
|
142 |
+
2024-07-29 08:55:12,782 New best score!
|
143 |
+
2024-07-29 08:55:12,783 ----------------------------------------------------------------------------------------------------
|
144 |
+
2024-07-29 08:55:12,783 EPOCH 7
|
145 |
+
2024-07-29 08:56:04,750 batch 104/1044 - loss 1.98695231 - lr 0.0010 - time 51.97s
|
146 |
+
2024-07-29 08:56:58,800 batch 208/1044 - loss 1.98767810 - lr 0.0010 - time 106.02s
|
147 |
+
2024-07-29 08:57:49,243 batch 312/1044 - loss 1.98459300 - lr 0.0010 - time 156.46s
|
148 |
+
2024-07-29 08:58:41,780 batch 416/1044 - loss 1.98503252 - lr 0.0010 - time 209.00s
|
149 |
+
2024-07-29 08:59:33,609 batch 520/1044 - loss 1.98710582 - lr 0.0010 - time 260.83s
|
150 |
+
2024-07-29 09:00:26,006 batch 624/1044 - loss 1.98528185 - lr 0.0010 - time 313.22s
|
151 |
+
2024-07-29 09:01:19,139 batch 728/1044 - loss 1.98337018 - lr 0.0010 - time 366.36s
|
152 |
+
2024-07-29 09:02:11,214 batch 832/1044 - loss 1.98256551 - lr 0.0010 - time 418.43s
|
153 |
+
2024-07-29 09:03:02,061 batch 936/1044 - loss 1.98131203 - lr 0.0010 - time 469.28s
|
154 |
+
2024-07-29 09:03:53,668 batch 1040/1044 - loss 1.97932312 - lr 0.0010 - time 520.88s
|
155 |
+
2024-07-29 09:03:55,909 ----------------------------------------------------------------------------------------------------
|
156 |
+
2024-07-29 09:03:55,910 EPOCH 7 DONE
|
157 |
+
2024-07-29 09:04:21,679 TRAIN Loss: 1.9796
|
158 |
+
2024-07-29 09:04:21,679 DEV Loss: 3.3571
|
159 |
+
2024-07-29 09:04:21,679 DEV Perplexity: 28.7050
|
160 |
+
2024-07-29 09:04:21,679 No improvement for 1 epoch(s)
|
161 |
+
2024-07-29 09:04:21,679 ----------------------------------------------------------------------------------------------------
|
162 |
+
2024-07-29 09:04:21,679 EPOCH 8
|
163 |
+
2024-07-29 09:05:13,500 batch 104/1044 - loss 1.97407123 - lr 0.0010 - time 51.82s
|
164 |
+
2024-07-29 09:06:04,321 batch 208/1044 - loss 1.96966393 - lr 0.0010 - time 102.64s
|
165 |
+
2024-07-29 09:06:55,085 batch 312/1044 - loss 1.96944196 - lr 0.0010 - time 153.41s
|
166 |
+
2024-07-29 09:07:47,563 batch 416/1044 - loss 1.96693789 - lr 0.0010 - time 205.88s
|
167 |
+
2024-07-29 09:08:40,188 batch 520/1044 - loss 1.96657811 - lr 0.0010 - time 258.51s
|
168 |
+
2024-07-29 09:09:32,010 batch 624/1044 - loss 1.96688818 - lr 0.0010 - time 310.33s
|
169 |
+
2024-07-29 09:10:22,905 batch 728/1044 - loss 1.96592610 - lr 0.0010 - time 361.23s
|
170 |
+
2024-07-29 09:11:15,842 batch 832/1044 - loss 1.96564289 - lr 0.0010 - time 414.16s
|
171 |
+
2024-07-29 09:12:07,382 batch 936/1044 - loss 1.96510702 - lr 0.0010 - time 465.70s
|
172 |
+
2024-07-29 09:13:00,098 batch 1040/1044 - loss 1.96494248 - lr 0.0010 - time 518.42s
|
173 |
+
2024-07-29 09:13:02,363 ----------------------------------------------------------------------------------------------------
|
174 |
+
2024-07-29 09:13:02,365 EPOCH 8 DONE
|
175 |
+
2024-07-29 09:13:28,243 TRAIN Loss: 1.9653
|
176 |
+
2024-07-29 09:13:28,244 DEV Loss: 3.2717
|
177 |
+
2024-07-29 09:13:28,244 DEV Perplexity: 26.3550
|
178 |
+
2024-07-29 09:13:28,244 New best score!
|
179 |
+
2024-07-29 09:13:28,245 ----------------------------------------------------------------------------------------------------
|
180 |
+
2024-07-29 09:13:28,245 EPOCH 9
|
181 |
+
2024-07-29 09:14:17,916 batch 104/1044 - loss 1.93107039 - lr 0.0010 - time 49.67s
|
182 |
+
2024-07-29 09:15:10,060 batch 208/1044 - loss 1.95099017 - lr 0.0010 - time 101.81s
|
183 |
+
2024-07-29 09:16:01,819 batch 312/1044 - loss 1.94943737 - lr 0.0010 - time 153.57s
|
184 |
+
2024-07-29 09:16:56,659 batch 416/1044 - loss 1.94723259 - lr 0.0010 - time 208.41s
|
185 |
+
2024-07-29 09:17:48,313 batch 520/1044 - loss 1.94754128 - lr 0.0010 - time 260.07s
|
186 |
+
2024-07-29 09:18:38,708 batch 624/1044 - loss 1.94901741 - lr 0.0010 - time 310.46s
|
187 |
+
2024-07-29 09:19:29,542 batch 728/1044 - loss 1.95013667 - lr 0.0010 - time 361.30s
|
188 |
+
2024-07-29 09:20:22,714 batch 832/1044 - loss 1.94866815 - lr 0.0010 - time 414.47s
|
189 |
+
2024-07-29 09:21:15,236 batch 936/1044 - loss 1.94871606 - lr 0.0010 - time 466.99s
|
190 |
+
2024-07-29 09:22:06,555 batch 1040/1044 - loss 1.94837562 - lr 0.0010 - time 518.31s
|
191 |
+
2024-07-29 09:22:08,570 ----------------------------------------------------------------------------------------------------
|
192 |
+
2024-07-29 09:22:08,572 EPOCH 9 DONE
|
193 |
+
2024-07-29 09:22:34,432 TRAIN Loss: 1.9484
|
194 |
+
2024-07-29 09:22:34,432 DEV Loss: 3.3895
|
195 |
+
2024-07-29 09:22:34,432 DEV Perplexity: 29.6497
|
196 |
+
2024-07-29 09:22:34,432 No improvement for 1 epoch(s)
|
197 |
+
2024-07-29 09:22:34,432 ----------------------------------------------------------------------------------------------------
|
198 |
+
2024-07-29 09:22:34,432 EPOCH 10
|
199 |
+
2024-07-29 09:23:25,550 batch 104/1044 - loss 1.93740847 - lr 0.0010 - time 51.12s
|
200 |
+
2024-07-29 09:24:14,975 batch 208/1044 - loss 1.94865602 - lr 0.0010 - time 100.54s
|
201 |
+
2024-07-29 09:25:08,386 batch 312/1044 - loss 1.93897269 - lr 0.0010 - time 153.95s
|
202 |
+
2024-07-29 09:26:01,085 batch 416/1044 - loss 1.93520124 - lr 0.0010 - time 206.65s
|
203 |
+
2024-07-29 09:26:53,620 batch 520/1044 - loss 1.93428783 - lr 0.0010 - time 259.19s
|
204 |
+
2024-07-29 09:27:46,957 batch 624/1044 - loss 1.93437176 - lr 0.0010 - time 312.52s
|
205 |
+
2024-07-29 09:28:39,693 batch 728/1044 - loss 1.93431406 - lr 0.0010 - time 365.26s
|
206 |
+
2024-07-29 09:29:31,536 batch 832/1044 - loss 1.93312064 - lr 0.0010 - time 417.10s
|
207 |
+
2024-07-29 09:30:23,577 batch 936/1044 - loss 1.93337018 - lr 0.0010 - time 469.14s
|
208 |
+
2024-07-29 09:31:15,446 batch 1040/1044 - loss 1.93256764 - lr 0.0010 - time 521.01s
|
209 |
+
2024-07-29 09:31:17,259 ----------------------------------------------------------------------------------------------------
|
210 |
+
2024-07-29 09:31:17,261 EPOCH 10 DONE
|
211 |
+
2024-07-29 09:31:43,257 TRAIN Loss: 1.9327
|
212 |
+
2024-07-29 09:31:43,257 DEV Loss: 3.4304
|
213 |
+
2024-07-29 09:31:43,257 DEV Perplexity: 30.8875
|
214 |
+
2024-07-29 09:31:43,257 No improvement for 2 epoch(s)
|
215 |
+
2024-07-29 09:31:43,257 ----------------------------------------------------------------------------------------------------
|
216 |
+
2024-07-29 09:31:43,257 Finished Training
|
217 |
+
2024-07-29 09:32:34,245 TEST Perplexity: 26.3855
|
218 |
+
2024-07-29 09:42:12,703 TEST BLEU = 3.28 38.5/5.9/1.0/0.5 (BP = 1.000 ratio = 1.000 hyp_len = 52 ref_len = 52)
|
models/de2en/character_end2end_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9091ae704923252f171c2b72acecdbe8a130534415536b779d5253c3b9ce7758
|
3 |
+
size 35809076
|
models/de2en/word_end2end_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 09:42:42,423 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 09:42:42,423 Training Model
|
3 |
+
2024-07-29 09:42:42,423 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 09:42:42,423 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(22834, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(14303, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=14303, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-29 09:42:42,423 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-29 09:42:42,423 Training Hyperparameters:
|
27 |
+
2024-07-29 09:42:42,423 - max_epochs: 10
|
28 |
+
2024-07-29 09:42:42,423 - learning_rate: 0.001
|
29 |
+
2024-07-29 09:42:42,423 - batch_size: 128
|
30 |
+
2024-07-29 09:42:42,423 - patience: 5
|
31 |
+
2024-07-29 09:42:42,423 - scheduler_patience: 3
|
32 |
+
2024-07-29 09:42:42,423 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-29 09:42:42,423 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-29 09:42:42,423 Computational Parameters:
|
35 |
+
2024-07-29 09:42:42,423 - num_workers: 4
|
36 |
+
2024-07-29 09:42:42,423 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-29 09:42:42,423 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-29 09:42:42,423 Dataset Splits:
|
39 |
+
2024-07-29 09:42:42,423 - train: 133623 data points
|
40 |
+
2024-07-29 09:42:42,423 - dev: 19090 data points
|
41 |
+
2024-07-29 09:42:42,423 - test: 38179 data points
|
42 |
+
2024-07-29 09:42:42,424 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-29 09:42:42,424 EPOCH 1
|
44 |
+
2024-07-29 09:43:17,154 batch 104/1044 - loss 6.42034669 - lr 0.0010 - time 34.73s
|
45 |
+
2024-07-29 09:43:54,707 batch 208/1044 - loss 6.17063731 - lr 0.0010 - time 72.28s
|
46 |
+
2024-07-29 09:44:32,541 batch 312/1044 - loss 6.00517355 - lr 0.0010 - time 110.12s
|
47 |
+
2024-07-29 09:45:10,795 batch 416/1044 - loss 5.87077612 - lr 0.0010 - time 148.37s
|
48 |
+
2024-07-29 09:45:47,671 batch 520/1044 - loss 5.75463560 - lr 0.0010 - time 185.25s
|
49 |
+
2024-07-29 09:46:24,949 batch 624/1044 - loss 5.65824632 - lr 0.0010 - time 222.53s
|
50 |
+
2024-07-29 09:47:03,552 batch 728/1044 - loss 5.56939856 - lr 0.0010 - time 261.13s
|
51 |
+
2024-07-29 09:47:40,687 batch 832/1044 - loss 5.49128213 - lr 0.0010 - time 298.26s
|
52 |
+
2024-07-29 09:48:18,127 batch 936/1044 - loss 5.41966415 - lr 0.0010 - time 335.70s
|
53 |
+
2024-07-29 09:48:55,119 batch 1040/1044 - loss 5.35489992 - lr 0.0010 - time 372.70s
|
54 |
+
2024-07-29 09:48:56,680 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-29 09:48:56,681 EPOCH 1 DONE
|
56 |
+
2024-07-29 09:49:06,113 TRAIN Loss: 5.3525
|
57 |
+
2024-07-29 09:49:06,114 DEV Loss: 5.5692
|
58 |
+
2024-07-29 09:49:06,114 DEV Perplexity: 262.2315
|
59 |
+
2024-07-29 09:49:06,114 New best score!
|
60 |
+
2024-07-29 09:49:06,115 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-29 09:49:06,115 EPOCH 2
|
62 |
+
2024-07-29 09:49:41,222 batch 104/1044 - loss 4.62738995 - lr 0.0010 - time 35.11s
|
63 |
+
2024-07-29 09:50:17,864 batch 208/1044 - loss 4.59759969 - lr 0.0010 - time 71.75s
|
64 |
+
2024-07-29 09:50:53,411 batch 312/1044 - loss 4.57657494 - lr 0.0010 - time 107.30s
|
65 |
+
2024-07-29 09:51:31,209 batch 416/1044 - loss 4.54348163 - lr 0.0010 - time 145.09s
|
66 |
+
2024-07-29 09:52:11,697 batch 520/1044 - loss 4.51823422 - lr 0.0010 - time 185.58s
|
67 |
+
2024-07-29 09:52:48,926 batch 624/1044 - loss 4.49001330 - lr 0.0010 - time 222.81s
|
68 |
+
2024-07-29 09:53:24,588 batch 728/1044 - loss 4.46876206 - lr 0.0010 - time 258.47s
|
69 |
+
2024-07-29 09:54:02,468 batch 832/1044 - loss 4.44477118 - lr 0.0010 - time 296.35s
|
70 |
+
2024-07-29 09:54:39,911 batch 936/1044 - loss 4.42371725 - lr 0.0010 - time 333.80s
|
71 |
+
2024-07-29 09:55:16,492 batch 1040/1044 - loss 4.40068238 - lr 0.0010 - time 370.38s
|
72 |
+
2024-07-29 09:55:18,277 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-29 09:55:18,279 EPOCH 2 DONE
|
74 |
+
2024-07-29 09:55:27,546 TRAIN Loss: 4.3997
|
75 |
+
2024-07-29 09:55:27,546 DEV Loss: 5.2857
|
76 |
+
2024-07-29 09:55:27,546 DEV Perplexity: 197.4908
|
77 |
+
2024-07-29 09:55:27,546 New best score!
|
78 |
+
2024-07-29 09:55:27,547 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-29 09:55:27,547 EPOCH 3
|
80 |
+
2024-07-29 09:56:04,874 batch 104/1044 - loss 4.04292682 - lr 0.0010 - time 37.33s
|
81 |
+
2024-07-29 09:56:44,240 batch 208/1044 - loss 4.04458403 - lr 0.0010 - time 76.69s
|
82 |
+
2024-07-29 09:57:19,595 batch 312/1044 - loss 4.04015087 - lr 0.0010 - time 112.05s
|
83 |
+
2024-07-29 09:57:58,341 batch 416/1044 - loss 4.03473626 - lr 0.0010 - time 150.79s
|
84 |
+
2024-07-29 09:58:33,685 batch 520/1044 - loss 4.02294693 - lr 0.0010 - time 186.14s
|
85 |
+
2024-07-29 09:59:09,374 batch 624/1044 - loss 4.00945110 - lr 0.0010 - time 221.83s
|
86 |
+
2024-07-29 09:59:49,125 batch 728/1044 - loss 4.00042684 - lr 0.0010 - time 261.58s
|
87 |
+
2024-07-29 10:00:26,299 batch 832/1044 - loss 3.99049270 - lr 0.0010 - time 298.75s
|
88 |
+
2024-07-29 10:01:03,713 batch 936/1044 - loss 3.97934972 - lr 0.0010 - time 336.17s
|
89 |
+
2024-07-29 10:01:40,625 batch 1040/1044 - loss 3.96891846 - lr 0.0010 - time 373.08s
|
90 |
+
2024-07-29 10:01:41,787 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-29 10:01:41,789 EPOCH 3 DONE
|
92 |
+
2024-07-29 10:01:51,163 TRAIN Loss: 3.9687
|
93 |
+
2024-07-29 10:01:51,163 DEV Loss: 5.2440
|
94 |
+
2024-07-29 10:01:51,163 DEV Perplexity: 189.4295
|
95 |
+
2024-07-29 10:01:51,163 New best score!
|
96 |
+
2024-07-29 10:01:51,164 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-29 10:01:51,164 EPOCH 4
|
98 |
+
2024-07-29 10:02:31,057 batch 104/1044 - loss 3.74893653 - lr 0.0010 - time 39.89s
|
99 |
+
2024-07-29 10:03:05,331 batch 208/1044 - loss 3.75399486 - lr 0.0010 - time 74.17s
|
100 |
+
2024-07-29 10:03:41,466 batch 312/1044 - loss 3.75771751 - lr 0.0010 - time 110.30s
|
101 |
+
2024-07-29 10:04:15,960 batch 416/1044 - loss 3.75979321 - lr 0.0010 - time 144.80s
|
102 |
+
2024-07-29 10:04:55,428 batch 520/1044 - loss 3.75057765 - lr 0.0010 - time 184.26s
|
103 |
+
2024-07-29 10:05:33,137 batch 624/1044 - loss 3.74305481 - lr 0.0010 - time 221.97s
|
104 |
+
2024-07-29 10:06:09,059 batch 728/1044 - loss 3.73923583 - lr 0.0010 - time 257.89s
|
105 |
+
2024-07-29 10:06:47,012 batch 832/1044 - loss 3.73675085 - lr 0.0010 - time 295.85s
|
106 |
+
2024-07-29 10:07:23,641 batch 936/1044 - loss 3.73419790 - lr 0.0010 - time 332.48s
|
107 |
+
2024-07-29 10:07:58,748 batch 1040/1044 - loss 3.72953442 - lr 0.0010 - time 367.58s
|
108 |
+
2024-07-29 10:08:00,245 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-29 10:08:00,246 EPOCH 4 DONE
|
110 |
+
2024-07-29 10:08:09,716 TRAIN Loss: 3.7292
|
111 |
+
2024-07-29 10:08:09,717 DEV Loss: 5.1546
|
112 |
+
2024-07-29 10:08:09,717 DEV Perplexity: 173.2260
|
113 |
+
2024-07-29 10:08:09,717 New best score!
|
114 |
+
2024-07-29 10:08:09,718 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-29 10:08:09,718 EPOCH 5
|
116 |
+
2024-07-29 10:08:48,898 batch 104/1044 - loss 3.53810529 - lr 0.0010 - time 39.18s
|
117 |
+
2024-07-29 10:09:24,261 batch 208/1044 - loss 3.54713277 - lr 0.0010 - time 74.54s
|
118 |
+
2024-07-29 10:09:59,554 batch 312/1044 - loss 3.55520624 - lr 0.0010 - time 109.84s
|
119 |
+
2024-07-29 10:10:35,964 batch 416/1044 - loss 3.54529557 - lr 0.0010 - time 146.25s
|
120 |
+
2024-07-29 10:11:13,273 batch 520/1044 - loss 3.53952308 - lr 0.0010 - time 183.56s
|
121 |
+
2024-07-29 10:11:49,699 batch 624/1044 - loss 3.53902453 - lr 0.0010 - time 219.98s
|
122 |
+
2024-07-29 10:12:26,577 batch 728/1044 - loss 3.54207764 - lr 0.0010 - time 256.86s
|
123 |
+
2024-07-29 10:13:03,988 batch 832/1044 - loss 3.54191658 - lr 0.0010 - time 294.27s
|
124 |
+
2024-07-29 10:13:44,152 batch 936/1044 - loss 3.54287420 - lr 0.0010 - time 334.43s
|
125 |
+
2024-07-29 10:14:19,848 batch 1040/1044 - loss 3.54355186 - lr 0.0010 - time 370.13s
|
126 |
+
2024-07-29 10:14:21,679 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-29 10:14:21,680 EPOCH 5 DONE
|
128 |
+
2024-07-29 10:14:31,157 TRAIN Loss: 3.5436
|
129 |
+
2024-07-29 10:14:31,157 DEV Loss: 5.1595
|
130 |
+
2024-07-29 10:14:31,157 DEV Perplexity: 174.0773
|
131 |
+
2024-07-29 10:14:31,157 No improvement for 1 epoch(s)
|
132 |
+
2024-07-29 10:14:31,157 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-29 10:14:31,157 EPOCH 6
|
134 |
+
2024-07-29 10:15:09,004 batch 104/1044 - loss 3.37988193 - lr 0.0010 - time 37.85s
|
135 |
+
2024-07-29 10:15:46,449 batch 208/1044 - loss 3.39972965 - lr 0.0010 - time 75.29s
|
136 |
+
2024-07-29 10:16:23,877 batch 312/1044 - loss 3.41839841 - lr 0.0010 - time 112.72s
|
137 |
+
2024-07-29 10:17:02,860 batch 416/1044 - loss 3.42049147 - lr 0.0010 - time 151.70s
|
138 |
+
2024-07-29 10:17:39,715 batch 520/1044 - loss 3.42189572 - lr 0.0010 - time 188.56s
|
139 |
+
2024-07-29 10:18:16,287 batch 624/1044 - loss 3.41934290 - lr 0.0010 - time 225.13s
|
140 |
+
2024-07-29 10:18:49,350 batch 728/1044 - loss 3.42369204 - lr 0.0010 - time 258.19s
|
141 |
+
2024-07-29 10:19:27,406 batch 832/1044 - loss 3.42245102 - lr 0.0010 - time 296.25s
|
142 |
+
2024-07-29 10:20:04,324 batch 936/1044 - loss 3.42058108 - lr 0.0010 - time 333.17s
|
143 |
+
2024-07-29 10:20:39,261 batch 1040/1044 - loss 3.42255051 - lr 0.0010 - time 368.10s
|
144 |
+
2024-07-29 10:20:43,715 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 10:20:43,717 EPOCH 6 DONE
|
146 |
+
2024-07-29 10:20:53,217 TRAIN Loss: 3.4223
|
147 |
+
2024-07-29 10:20:53,218 DEV Loss: 5.1826
|
148 |
+
2024-07-29 10:20:53,218 DEV Perplexity: 178.1495
|
149 |
+
2024-07-29 10:20:53,218 No improvement for 2 epoch(s)
|
150 |
+
2024-07-29 10:20:53,218 ----------------------------------------------------------------------------------------------------
|
151 |
+
2024-07-29 10:20:53,218 EPOCH 7
|
152 |
+
2024-07-29 10:21:31,444 batch 104/1044 - loss 3.29632874 - lr 0.0010 - time 38.23s
|
153 |
+
2024-07-29 10:22:10,060 batch 208/1044 - loss 3.29179441 - lr 0.0010 - time 76.84s
|
154 |
+
2024-07-29 10:22:45,065 batch 312/1044 - loss 3.28852440 - lr 0.0010 - time 111.85s
|
155 |
+
2024-07-29 10:23:21,129 batch 416/1044 - loss 3.29654682 - lr 0.0010 - time 147.91s
|
156 |
+
2024-07-29 10:23:58,897 batch 520/1044 - loss 3.30062932 - lr 0.0010 - time 185.68s
|
157 |
+
2024-07-29 10:24:37,910 batch 624/1044 - loss 3.31254658 - lr 0.0010 - time 224.69s
|
158 |
+
2024-07-29 10:25:15,978 batch 728/1044 - loss 3.31376025 - lr 0.0010 - time 262.76s
|
159 |
+
2024-07-29 10:25:53,003 batch 832/1044 - loss 3.31953892 - lr 0.0010 - time 299.79s
|
160 |
+
2024-07-29 10:26:30,024 batch 936/1044 - loss 3.32268426 - lr 0.0010 - time 336.81s
|
161 |
+
2024-07-29 10:27:05,685 batch 1040/1044 - loss 3.32460238 - lr 0.0010 - time 372.47s
|
162 |
+
2024-07-29 10:27:06,955 ----------------------------------------------------------------------------------------------------
|
163 |
+
2024-07-29 10:27:06,957 EPOCH 7 DONE
|
164 |
+
2024-07-29 10:27:16,539 TRAIN Loss: 3.3246
|
165 |
+
2024-07-29 10:27:16,539 DEV Loss: 5.2310
|
166 |
+
2024-07-29 10:27:16,539 DEV Perplexity: 186.9724
|
167 |
+
2024-07-29 10:27:16,539 No improvement for 3 epoch(s)
|
168 |
+
2024-07-29 10:27:16,539 ----------------------------------------------------------------------------------------------------
|
169 |
+
2024-07-29 10:27:16,539 EPOCH 8
|
170 |
+
2024-07-29 10:27:55,681 batch 104/1044 - loss 3.18067933 - lr 0.0010 - time 39.14s
|
171 |
+
2024-07-29 10:28:30,973 batch 208/1044 - loss 3.20228673 - lr 0.0010 - time 74.43s
|
172 |
+
2024-07-29 10:29:06,064 batch 312/1044 - loss 3.20549937 - lr 0.0010 - time 109.53s
|
173 |
+
2024-07-29 10:29:43,870 batch 416/1044 - loss 3.21897588 - lr 0.0010 - time 147.33s
|
174 |
+
2024-07-29 10:30:19,159 batch 520/1044 - loss 3.22153870 - lr 0.0010 - time 182.62s
|
175 |
+
2024-07-29 10:30:55,565 batch 624/1044 - loss 3.22599725 - lr 0.0010 - time 219.03s
|
176 |
+
2024-07-29 10:31:33,714 batch 728/1044 - loss 3.22878759 - lr 0.0010 - time 257.18s
|
177 |
+
2024-07-29 10:32:10,440 batch 832/1044 - loss 3.23212968 - lr 0.0010 - time 293.90s
|
178 |
+
2024-07-29 10:32:48,422 batch 936/1044 - loss 3.23624962 - lr 0.0010 - time 331.88s
|
179 |
+
2024-07-29 10:33:24,964 batch 1040/1044 - loss 3.23659680 - lr 0.0010 - time 368.42s
|
180 |
+
2024-07-29 10:33:26,214 ----------------------------------------------------------------------------------------------------
|
181 |
+
2024-07-29 10:33:26,216 EPOCH 8 DONE
|
182 |
+
2024-07-29 10:33:35,755 TRAIN Loss: 3.2367
|
183 |
+
2024-07-29 10:33:35,756 DEV Loss: 5.2968
|
184 |
+
2024-07-29 10:33:35,756 DEV Perplexity: 199.6878
|
185 |
+
2024-07-29 10:33:35,756 No improvement for 4 epoch(s)
|
186 |
+
2024-07-29 10:33:35,756 ----------------------------------------------------------------------------------------------------
|
187 |
+
2024-07-29 10:33:35,756 EPOCH 9
|
188 |
+
2024-07-29 10:34:15,083 batch 104/1044 - loss 3.08033091 - lr 0.0001 - time 39.33s
|
189 |
+
2024-07-29 10:34:52,691 batch 208/1044 - loss 3.07522689 - lr 0.0001 - time 76.93s
|
190 |
+
2024-07-29 10:35:29,151 batch 312/1044 - loss 3.06626054 - lr 0.0001 - time 113.39s
|
191 |
+
2024-07-29 10:36:06,720 batch 416/1044 - loss 3.06839789 - lr 0.0001 - time 150.96s
|
192 |
+
2024-07-29 10:36:41,167 batch 520/1044 - loss 3.06539460 - lr 0.0001 - time 185.41s
|
193 |
+
2024-07-29 10:37:17,074 batch 624/1044 - loss 3.06574041 - lr 0.0001 - time 221.32s
|
194 |
+
2024-07-29 10:37:54,392 batch 728/1044 - loss 3.06843089 - lr 0.0001 - time 258.64s
|
195 |
+
2024-07-29 10:38:31,689 batch 832/1044 - loss 3.06777010 - lr 0.0001 - time 295.93s
|
196 |
+
2024-07-29 10:39:06,956 batch 936/1044 - loss 3.06646013 - lr 0.0001 - time 331.20s
|
197 |
+
2024-07-29 10:39:45,993 batch 1040/1044 - loss 3.06478271 - lr 0.0001 - time 370.24s
|
198 |
+
2024-07-29 10:39:47,096 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-29 10:39:47,098 EPOCH 9 DONE
|
200 |
+
2024-07-29 10:39:56,496 TRAIN Loss: 3.0646
|
201 |
+
2024-07-29 10:39:56,497 DEV Loss: 5.1945
|
202 |
+
2024-07-29 10:39:56,497 DEV Perplexity: 180.2739
|
203 |
+
2024-07-29 10:39:56,497 No improvement for 5 epoch(s)
|
204 |
+
2024-07-29 10:39:56,497 Patience reached: Terminating model training due to early stopping
|
205 |
+
2024-07-29 10:39:56,497 ----------------------------------------------------------------------------------------------------
|
206 |
+
2024-07-29 10:39:56,497 Finished Training
|
207 |
+
2024-07-29 10:40:14,449 TEST Perplexity: 173.0781
|
208 |
+
2024-07-29 10:49:34,588 TEST BLEU = 17.27 82.9/65.2/22.1/0.7 (BP = 1.000 ratio = 1.000 hyp_len = 70 ref_len = 70)
|
models/de2en/word_end2end_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19a3bd0f5aa6b92e25405bc92cf76065a2858c33d009ffa182cd554d5c71fffb
|
3 |
+
size 90483176
|
models/de2en/word_end2end_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 10:50:04,754 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 10:50:04,754 Training Model
|
3 |
+
2024-07-29 10:50:04,754 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 10:50:04,754 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(22834, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(14303, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=14303, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-29 10:50:04,754 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-29 10:50:04,754 Training Hyperparameters:
|
20 |
+
2024-07-29 10:50:04,754 - max_epochs: 10
|
21 |
+
2024-07-29 10:50:04,754 - learning_rate: 0.001
|
22 |
+
2024-07-29 10:50:04,754 - batch_size: 128
|
23 |
+
2024-07-29 10:50:04,754 - patience: 5
|
24 |
+
2024-07-29 10:50:04,754 - scheduler_patience: 3
|
25 |
+
2024-07-29 10:50:04,754 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-29 10:50:04,754 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-29 10:50:04,754 Computational Parameters:
|
28 |
+
2024-07-29 10:50:04,754 - num_workers: 4
|
29 |
+
2024-07-29 10:50:04,754 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-29 10:50:04,754 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-29 10:50:04,754 Dataset Splits:
|
32 |
+
2024-07-29 10:50:04,754 - train: 133623 data points
|
33 |
+
2024-07-29 10:50:04,754 - dev: 19090 data points
|
34 |
+
2024-07-29 10:50:04,754 - test: 38179 data points
|
35 |
+
2024-07-29 10:50:04,754 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-29 10:50:04,754 EPOCH 1
|
37 |
+
2024-07-29 10:50:43,338 batch 104/1044 - loss 6.22291176 - lr 0.0010 - time 38.58s
|
38 |
+
2024-07-29 10:51:22,361 batch 208/1044 - loss 5.93824714 - lr 0.0010 - time 77.61s
|
39 |
+
2024-07-29 10:51:58,394 batch 312/1044 - loss 5.77446225 - lr 0.0010 - time 113.64s
|
40 |
+
2024-07-29 10:52:35,218 batch 416/1044 - loss 5.66314680 - lr 0.0010 - time 150.46s
|
41 |
+
2024-07-29 10:53:13,517 batch 520/1044 - loss 5.57905510 - lr 0.0010 - time 188.76s
|
42 |
+
2024-07-29 10:53:50,047 batch 624/1044 - loss 5.50804531 - lr 0.0010 - time 225.29s
|
43 |
+
2024-07-29 10:54:25,922 batch 728/1044 - loss 5.44633782 - lr 0.0010 - time 261.17s
|
44 |
+
2024-07-29 10:55:06,496 batch 832/1044 - loss 5.39363432 - lr 0.0010 - time 301.74s
|
45 |
+
2024-07-29 10:55:44,009 batch 936/1044 - loss 5.34465503 - lr 0.0010 - time 339.25s
|
46 |
+
2024-07-29 10:56:20,608 batch 1040/1044 - loss 5.30237069 - lr 0.0010 - time 375.85s
|
47 |
+
2024-07-29 10:56:21,834 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-29 10:56:21,835 EPOCH 1 DONE
|
49 |
+
2024-07-29 10:56:32,218 TRAIN Loss: 5.3006
|
50 |
+
2024-07-29 10:56:32,218 DEV Loss: 5.6956
|
51 |
+
2024-07-29 10:56:32,218 DEV Perplexity: 297.5674
|
52 |
+
2024-07-29 10:56:32,218 New best score!
|
53 |
+
2024-07-29 10:56:32,219 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-29 10:56:32,219 EPOCH 2
|
55 |
+
2024-07-29 10:57:08,475 batch 104/1044 - loss 4.79604481 - lr 0.0010 - time 36.26s
|
56 |
+
2024-07-29 10:57:45,165 batch 208/1044 - loss 4.77674592 - lr 0.0010 - time 72.95s
|
57 |
+
2024-07-29 10:58:21,266 batch 312/1044 - loss 4.76575011 - lr 0.0010 - time 109.05s
|
58 |
+
2024-07-29 10:58:58,850 batch 416/1044 - loss 4.74902598 - lr 0.0010 - time 146.63s
|
59 |
+
2024-07-29 10:59:38,377 batch 520/1044 - loss 4.73332064 - lr 0.0010 - time 186.16s
|
60 |
+
2024-07-29 11:00:17,866 batch 624/1044 - loss 4.71984003 - lr 0.0010 - time 225.65s
|
61 |
+
2024-07-29 11:00:54,011 batch 728/1044 - loss 4.70516888 - lr 0.0010 - time 261.79s
|
62 |
+
2024-07-29 11:01:34,485 batch 832/1044 - loss 4.69128365 - lr 0.0010 - time 302.27s
|
63 |
+
2024-07-29 11:02:12,777 batch 936/1044 - loss 4.67815600 - lr 0.0010 - time 340.56s
|
64 |
+
2024-07-29 11:02:51,261 batch 1040/1044 - loss 4.66313868 - lr 0.0010 - time 379.04s
|
65 |
+
2024-07-29 11:02:52,925 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-29 11:02:52,925 EPOCH 2 DONE
|
67 |
+
2024-07-29 11:03:03,081 TRAIN Loss: 4.6629
|
68 |
+
2024-07-29 11:03:03,081 DEV Loss: 5.4094
|
69 |
+
2024-07-29 11:03:03,081 DEV Perplexity: 223.5007
|
70 |
+
2024-07-29 11:03:03,081 New best score!
|
71 |
+
2024-07-29 11:03:03,082 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-29 11:03:03,082 EPOCH 3
|
73 |
+
2024-07-29 11:03:41,698 batch 104/1044 - loss 4.40099256 - lr 0.0010 - time 38.62s
|
74 |
+
2024-07-29 11:04:18,720 batch 208/1044 - loss 4.39715811 - lr 0.0010 - time 75.64s
|
75 |
+
2024-07-29 11:04:55,193 batch 312/1044 - loss 4.38702786 - lr 0.0010 - time 112.11s
|
76 |
+
2024-07-29 11:05:32,589 batch 416/1044 - loss 4.38275665 - lr 0.0010 - time 149.51s
|
77 |
+
2024-07-29 11:06:10,911 batch 520/1044 - loss 4.38342324 - lr 0.0010 - time 187.83s
|
78 |
+
2024-07-29 11:06:48,718 batch 624/1044 - loss 4.37729666 - lr 0.0010 - time 225.64s
|
79 |
+
2024-07-29 11:07:22,323 batch 728/1044 - loss 4.37210136 - lr 0.0010 - time 259.24s
|
80 |
+
2024-07-29 11:07:59,710 batch 832/1044 - loss 4.36537825 - lr 0.0010 - time 296.63s
|
81 |
+
2024-07-29 11:08:40,438 batch 936/1044 - loss 4.35723548 - lr 0.0010 - time 337.36s
|
82 |
+
2024-07-29 11:09:19,295 batch 1040/1044 - loss 4.35208639 - lr 0.0010 - time 376.21s
|
83 |
+
2024-07-29 11:09:20,974 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-29 11:09:20,976 EPOCH 3 DONE
|
85 |
+
2024-07-29 11:09:31,736 TRAIN Loss: 4.3516
|
86 |
+
2024-07-29 11:09:31,736 DEV Loss: 5.3795
|
87 |
+
2024-07-29 11:09:31,736 DEV Perplexity: 216.9040
|
88 |
+
2024-07-29 11:09:31,736 New best score!
|
89 |
+
2024-07-29 11:09:31,737 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-29 11:09:31,737 EPOCH 4
|
91 |
+
2024-07-29 11:10:09,632 batch 104/1044 - loss 4.12850516 - lr 0.0010 - time 37.89s
|
92 |
+
2024-07-29 11:10:48,903 batch 208/1044 - loss 4.12524635 - lr 0.0010 - time 77.17s
|
93 |
+
2024-07-29 11:11:25,547 batch 312/1044 - loss 4.14688384 - lr 0.0010 - time 113.81s
|
94 |
+
2024-07-29 11:12:07,723 batch 416/1044 - loss 4.15004679 - lr 0.0010 - time 155.99s
|
95 |
+
2024-07-29 11:12:42,092 batch 520/1044 - loss 4.15083167 - lr 0.0010 - time 190.35s
|
96 |
+
2024-07-29 11:13:18,356 batch 624/1044 - loss 4.14772662 - lr 0.0010 - time 226.62s
|
97 |
+
2024-07-29 11:13:57,278 batch 728/1044 - loss 4.14412260 - lr 0.0010 - time 265.54s
|
98 |
+
2024-07-29 11:14:34,685 batch 832/1044 - loss 4.14076383 - lr 0.0010 - time 302.95s
|
99 |
+
2024-07-29 11:15:11,405 batch 936/1044 - loss 4.13945565 - lr 0.0010 - time 339.67s
|
100 |
+
2024-07-29 11:15:47,876 batch 1040/1044 - loss 4.13794573 - lr 0.0010 - time 376.14s
|
101 |
+
2024-07-29 11:15:50,376 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-29 11:15:50,379 EPOCH 4 DONE
|
103 |
+
2024-07-29 11:16:00,736 TRAIN Loss: 4.1380
|
104 |
+
2024-07-29 11:16:00,736 DEV Loss: 5.2952
|
105 |
+
2024-07-29 11:16:00,736 DEV Perplexity: 199.3683
|
106 |
+
2024-07-29 11:16:00,736 New best score!
|
107 |
+
2024-07-29 11:16:00,737 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-29 11:16:00,737 EPOCH 5
|
109 |
+
2024-07-29 11:16:39,562 batch 104/1044 - loss 3.96431792 - lr 0.0010 - time 38.83s
|
110 |
+
2024-07-29 11:17:15,898 batch 208/1044 - loss 3.97457365 - lr 0.0010 - time 75.16s
|
111 |
+
2024-07-29 11:17:53,300 batch 312/1044 - loss 3.97400972 - lr 0.0010 - time 112.56s
|
112 |
+
2024-07-29 11:18:33,625 batch 416/1044 - loss 3.97165196 - lr 0.0010 - time 152.89s
|
113 |
+
2024-07-29 11:19:11,217 batch 520/1044 - loss 3.97641541 - lr 0.0010 - time 190.48s
|
114 |
+
2024-07-29 11:19:46,987 batch 624/1044 - loss 3.97542033 - lr 0.0010 - time 226.25s
|
115 |
+
2024-07-29 11:20:25,186 batch 728/1044 - loss 3.97692148 - lr 0.0010 - time 264.45s
|
116 |
+
2024-07-29 11:21:02,289 batch 832/1044 - loss 3.97921104 - lr 0.0010 - time 301.55s
|
117 |
+
2024-07-29 11:21:40,174 batch 936/1044 - loss 3.97930560 - lr 0.0010 - time 339.44s
|
118 |
+
2024-07-29 11:22:17,849 batch 1040/1044 - loss 3.97849798 - lr 0.0010 - time 377.11s
|
119 |
+
2024-07-29 11:22:19,179 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-29 11:22:19,180 EPOCH 5 DONE
|
121 |
+
2024-07-29 11:22:29,394 TRAIN Loss: 3.9785
|
122 |
+
2024-07-29 11:22:29,395 DEV Loss: 5.2711
|
123 |
+
2024-07-29 11:22:29,395 DEV Perplexity: 194.6379
|
124 |
+
2024-07-29 11:22:29,395 New best score!
|
125 |
+
2024-07-29 11:22:29,396 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-29 11:22:29,396 EPOCH 6
|
127 |
+
2024-07-29 11:23:06,490 batch 104/1044 - loss 3.79449145 - lr 0.0010 - time 37.09s
|
128 |
+
2024-07-29 11:23:44,605 batch 208/1044 - loss 3.80102162 - lr 0.0010 - time 75.21s
|
129 |
+
2024-07-29 11:24:20,606 batch 312/1044 - loss 3.82480847 - lr 0.0010 - time 111.21s
|
130 |
+
2024-07-29 11:25:00,470 batch 416/1044 - loss 3.82749171 - lr 0.0010 - time 151.07s
|
131 |
+
2024-07-29 11:25:39,446 batch 520/1044 - loss 3.83306203 - lr 0.0010 - time 190.05s
|
132 |
+
2024-07-29 11:26:16,751 batch 624/1044 - loss 3.83301514 - lr 0.0010 - time 227.36s
|
133 |
+
2024-07-29 11:26:56,003 batch 728/1044 - loss 3.83690645 - lr 0.0010 - time 266.61s
|
134 |
+
2024-07-29 11:27:31,400 batch 832/1044 - loss 3.83875727 - lr 0.0010 - time 302.00s
|
135 |
+
2024-07-29 11:28:08,468 batch 936/1044 - loss 3.83617867 - lr 0.0010 - time 339.07s
|
136 |
+
2024-07-29 11:28:45,647 batch 1040/1044 - loss 3.83976932 - lr 0.0010 - time 376.25s
|
137 |
+
2024-07-29 11:28:47,359 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-29 11:28:47,360 EPOCH 6 DONE
|
139 |
+
2024-07-29 11:28:57,993 TRAIN Loss: 3.8401
|
140 |
+
2024-07-29 11:28:57,994 DEV Loss: 5.2540
|
141 |
+
2024-07-29 11:28:57,994 DEV Perplexity: 191.3369
|
142 |
+
2024-07-29 11:28:57,994 New best score!
|
143 |
+
2024-07-29 11:28:57,995 ----------------------------------------------------------------------------------------------------
|
144 |
+
2024-07-29 11:28:57,995 EPOCH 7
|
145 |
+
2024-07-29 11:29:37,967 batch 104/1044 - loss 3.67017476 - lr 0.0010 - time 39.97s
|
146 |
+
2024-07-29 11:30:14,679 batch 208/1044 - loss 3.68186812 - lr 0.0010 - time 76.68s
|
147 |
+
2024-07-29 11:30:50,313 batch 312/1044 - loss 3.68655408 - lr 0.0010 - time 112.32s
|
148 |
+
2024-07-29 11:31:28,407 batch 416/1044 - loss 3.69328779 - lr 0.0010 - time 150.41s
|
149 |
+
2024-07-29 11:32:04,460 batch 520/1044 - loss 3.70375487 - lr 0.0010 - time 186.46s
|
150 |
+
2024-07-29 11:32:41,462 batch 624/1044 - loss 3.71066759 - lr 0.0010 - time 223.47s
|
151 |
+
2024-07-29 11:33:20,461 batch 728/1044 - loss 3.71740744 - lr 0.0010 - time 262.47s
|
152 |
+
2024-07-29 11:33:59,161 batch 832/1044 - loss 3.72158932 - lr 0.0010 - time 301.17s
|
153 |
+
2024-07-29 11:34:39,937 batch 936/1044 - loss 3.72798842 - lr 0.0010 - time 341.94s
|
154 |
+
2024-07-29 11:35:16,537 batch 1040/1044 - loss 3.73108852 - lr 0.0010 - time 378.54s
|
155 |
+
2024-07-29 11:35:17,759 ----------------------------------------------------------------------------------------------------
|
156 |
+
2024-07-29 11:35:17,760 EPOCH 7 DONE
|
157 |
+
2024-07-29 11:35:28,104 TRAIN Loss: 3.7309
|
158 |
+
2024-07-29 11:35:28,104 DEV Loss: 5.2985
|
159 |
+
2024-07-29 11:35:28,104 DEV Perplexity: 200.0320
|
160 |
+
2024-07-29 11:35:28,104 No improvement for 1 epoch(s)
|
161 |
+
2024-07-29 11:35:28,104 ----------------------------------------------------------------------------------------------------
|
162 |
+
2024-07-29 11:35:28,104 EPOCH 8
|
163 |
+
2024-07-29 11:36:04,164 batch 104/1044 - loss 3.58083070 - lr 0.0010 - time 36.06s
|
164 |
+
2024-07-29 11:36:43,289 batch 208/1044 - loss 3.58812592 - lr 0.0010 - time 75.18s
|
165 |
+
2024-07-29 11:37:21,968 batch 312/1044 - loss 3.59162074 - lr 0.0010 - time 113.86s
|
166 |
+
2024-07-29 11:38:00,978 batch 416/1044 - loss 3.59818534 - lr 0.0010 - time 152.87s
|
167 |
+
2024-07-29 11:38:37,516 batch 520/1044 - loss 3.60526753 - lr 0.0010 - time 189.41s
|
168 |
+
2024-07-29 11:39:14,825 batch 624/1044 - loss 3.61212452 - lr 0.0010 - time 226.72s
|
169 |
+
2024-07-29 11:39:53,312 batch 728/1044 - loss 3.61821994 - lr 0.0010 - time 265.21s
|
170 |
+
2024-07-29 11:40:29,316 batch 832/1044 - loss 3.62470438 - lr 0.0010 - time 301.21s
|
171 |
+
2024-07-29 11:41:04,349 batch 936/1044 - loss 3.62845350 - lr 0.0010 - time 336.25s
|
172 |
+
2024-07-29 11:41:44,512 batch 1040/1044 - loss 3.63141605 - lr 0.0010 - time 376.41s
|
173 |
+
2024-07-29 11:41:46,063 ----------------------------------------------------------------------------------------------------
|
174 |
+
2024-07-29 11:41:46,064 EPOCH 8 DONE
|
175 |
+
2024-07-29 11:41:56,359 TRAIN Loss: 3.6315
|
176 |
+
2024-07-29 11:41:56,359 DEV Loss: 5.3609
|
177 |
+
2024-07-29 11:41:56,359 DEV Perplexity: 212.9260
|
178 |
+
2024-07-29 11:41:56,359 No improvement for 2 epoch(s)
|
179 |
+
2024-07-29 11:41:56,359 ----------------------------------------------------------------------------------------------------
|
180 |
+
2024-07-29 11:41:56,359 EPOCH 9
|
181 |
+
2024-07-29 11:42:36,957 batch 104/1044 - loss 3.50648502 - lr 0.0010 - time 40.60s
|
182 |
+
2024-07-29 11:43:13,079 batch 208/1044 - loss 3.50864311 - lr 0.0010 - time 76.72s
|
183 |
+
2024-07-29 11:43:51,452 batch 312/1044 - loss 3.51420171 - lr 0.0010 - time 115.09s
|
184 |
+
2024-07-29 11:44:29,419 batch 416/1044 - loss 3.51720954 - lr 0.0010 - time 153.06s
|
185 |
+
2024-07-29 11:45:06,635 batch 520/1044 - loss 3.52723735 - lr 0.0010 - time 190.28s
|
186 |
+
2024-07-29 11:45:42,151 batch 624/1044 - loss 3.53320972 - lr 0.0010 - time 225.79s
|
187 |
+
2024-07-29 11:46:21,796 batch 728/1044 - loss 3.53833950 - lr 0.0010 - time 265.44s
|
188 |
+
2024-07-29 11:46:58,935 batch 832/1044 - loss 3.54215628 - lr 0.0010 - time 302.58s
|
189 |
+
2024-07-29 11:47:38,690 batch 936/1044 - loss 3.54798296 - lr 0.0010 - time 342.33s
|
190 |
+
2024-07-29 11:48:14,653 batch 1040/1044 - loss 3.55458613 - lr 0.0010 - time 378.29s
|
191 |
+
2024-07-29 11:48:15,983 ----------------------------------------------------------------------------------------------------
|
192 |
+
2024-07-29 11:48:15,985 EPOCH 9 DONE
|
193 |
+
2024-07-29 11:48:26,504 TRAIN Loss: 3.5546
|
194 |
+
2024-07-29 11:48:26,505 DEV Loss: 5.3435
|
195 |
+
2024-07-29 11:48:26,505 DEV Perplexity: 209.2481
|
196 |
+
2024-07-29 11:48:26,505 No improvement for 3 epoch(s)
|
197 |
+
2024-07-29 11:48:26,505 ----------------------------------------------------------------------------------------------------
|
198 |
+
2024-07-29 11:48:26,505 EPOCH 10
|
199 |
+
2024-07-29 11:49:02,899 batch 104/1044 - loss 3.40771045 - lr 0.0010 - time 36.39s
|
200 |
+
2024-07-29 11:49:44,040 batch 208/1044 - loss 3.42370991 - lr 0.0010 - time 77.53s
|
201 |
+
2024-07-29 11:50:19,449 batch 312/1044 - loss 3.42847678 - lr 0.0010 - time 112.94s
|
202 |
+
2024-07-29 11:50:57,004 batch 416/1044 - loss 3.43541002 - lr 0.0010 - time 150.50s
|
203 |
+
2024-07-29 11:51:35,579 batch 520/1044 - loss 3.43718368 - lr 0.0010 - time 189.07s
|
204 |
+
2024-07-29 11:52:13,930 batch 624/1044 - loss 3.44270241 - lr 0.0010 - time 227.43s
|
205 |
+
2024-07-29 11:52:50,874 batch 728/1044 - loss 3.44929790 - lr 0.0010 - time 264.37s
|
206 |
+
2024-07-29 11:53:27,201 batch 832/1044 - loss 3.45885872 - lr 0.0010 - time 300.70s
|
207 |
+
2024-07-29 11:54:05,960 batch 936/1044 - loss 3.46455428 - lr 0.0010 - time 339.46s
|
208 |
+
2024-07-29 11:54:43,515 batch 1040/1044 - loss 3.47035516 - lr 0.0010 - time 377.01s
|
209 |
+
2024-07-29 11:54:44,998 ----------------------------------------------------------------------------------------------------
|
210 |
+
2024-07-29 11:54:44,999 EPOCH 10 DONE
|
211 |
+
2024-07-29 11:54:55,213 TRAIN Loss: 3.4704
|
212 |
+
2024-07-29 11:54:55,213 DEV Loss: 5.3470
|
213 |
+
2024-07-29 11:54:55,213 DEV Perplexity: 209.9855
|
214 |
+
2024-07-29 11:54:55,213 No improvement for 4 epoch(s)
|
215 |
+
2024-07-29 11:54:55,213 ----------------------------------------------------------------------------------------------------
|
216 |
+
2024-07-29 11:54:55,213 Finished Training
|
217 |
+
2024-07-29 11:55:15,371 TEST Perplexity: 190.6175
|
218 |
+
2024-07-29 11:57:59,233 TEST BLEU = 18.21 79.5/63.2/18.6/1.2 (BP = 1.000 ratio = 1.000 hyp_len = 88 ref_len = 88)
|
models/de2en/word_end2end_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd1c7b481b8632ec220efb8a62516857775a1b8fc129b640d1ecf75a643701fe
|
3 |
+
size 139401460
|
models/de2en/word_word2vec_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 11:58:09,645 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 11:58:09,645 Training Model
|
3 |
+
2024-07-29 11:58:09,645 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 11:58:09,645 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(22834, 300, padding_idx=22829)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(14303, 300, padding_idx=14298)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=14303, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-29 11:58:09,645 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-29 11:58:09,645 Training Hyperparameters:
|
27 |
+
2024-07-29 11:58:09,645 - max_epochs: 10
|
28 |
+
2024-07-29 11:58:09,645 - learning_rate: 0.001
|
29 |
+
2024-07-29 11:58:09,645 - batch_size: 128
|
30 |
+
2024-07-29 11:58:09,645 - patience: 5
|
31 |
+
2024-07-29 11:58:09,645 - scheduler_patience: 3
|
32 |
+
2024-07-29 11:58:09,645 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-29 11:58:09,645 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-29 11:58:09,645 Computational Parameters:
|
35 |
+
2024-07-29 11:58:09,645 - num_workers: 4
|
36 |
+
2024-07-29 11:58:09,645 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-29 11:58:09,645 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-29 11:58:09,645 Dataset Splits:
|
39 |
+
2024-07-29 11:58:09,645 - train: 133623 data points
|
40 |
+
2024-07-29 11:58:09,645 - dev: 19090 data points
|
41 |
+
2024-07-29 11:58:09,645 - test: 38179 data points
|
42 |
+
2024-07-29 11:58:09,645 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-29 11:58:09,645 EPOCH 1
|
44 |
+
2024-07-29 11:58:44,611 batch 104/1044 - loss 6.23477593 - lr 0.0010 - time 34.97s
|
45 |
+
2024-07-29 11:59:21,123 batch 208/1044 - loss 5.90663943 - lr 0.0010 - time 71.48s
|
46 |
+
2024-07-29 11:59:59,484 batch 312/1044 - loss 5.68101043 - lr 0.0010 - time 109.84s
|
47 |
+
2024-07-29 12:00:33,236 batch 416/1044 - loss 5.50519204 - lr 0.0010 - time 143.59s
|
48 |
+
2024-07-29 12:01:07,828 batch 520/1044 - loss 5.35595245 - lr 0.0010 - time 178.18s
|
49 |
+
2024-07-29 12:01:41,584 batch 624/1044 - loss 5.23173786 - lr 0.0010 - time 211.94s
|
50 |
+
2024-07-29 12:02:16,655 batch 728/1044 - loss 5.12235606 - lr 0.0010 - time 247.01s
|
51 |
+
2024-07-29 12:02:54,702 batch 832/1044 - loss 5.03077019 - lr 0.0010 - time 285.06s
|
52 |
+
2024-07-29 12:03:31,197 batch 936/1044 - loss 4.94944220 - lr 0.0010 - time 321.55s
|
53 |
+
2024-07-29 12:04:06,838 batch 1040/1044 - loss 4.87826125 - lr 0.0010 - time 357.19s
|
54 |
+
2024-07-29 12:04:08,050 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-29 12:04:08,052 EPOCH 1 DONE
|
56 |
+
2024-07-29 12:04:17,660 TRAIN Loss: 4.8755
|
57 |
+
2024-07-29 12:04:17,660 DEV Loss: 5.5778
|
58 |
+
2024-07-29 12:04:17,660 DEV Perplexity: 264.4847
|
59 |
+
2024-07-29 12:04:17,660 New best score!
|
60 |
+
2024-07-29 12:04:17,661 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-29 12:04:17,661 EPOCH 2
|
62 |
+
2024-07-29 12:04:52,361 batch 104/1044 - loss 4.09781339 - lr 0.0010 - time 34.70s
|
63 |
+
2024-07-29 12:05:29,381 batch 208/1044 - loss 4.09235569 - lr 0.0010 - time 71.72s
|
64 |
+
2024-07-29 12:06:03,007 batch 312/1044 - loss 4.07521377 - lr 0.0010 - time 105.35s
|
65 |
+
2024-07-29 12:06:41,433 batch 416/1044 - loss 4.05366527 - lr 0.0010 - time 143.77s
|
66 |
+
2024-07-29 12:07:16,615 batch 520/1044 - loss 4.04409537 - lr 0.0010 - time 178.95s
|
67 |
+
2024-07-29 12:07:54,402 batch 624/1044 - loss 4.03171358 - lr 0.0010 - time 216.74s
|
68 |
+
2024-07-29 12:08:30,978 batch 728/1044 - loss 4.01387555 - lr 0.0010 - time 253.32s
|
69 |
+
2024-07-29 12:09:06,648 batch 832/1044 - loss 4.00238766 - lr 0.0010 - time 288.99s
|
70 |
+
2024-07-29 12:09:40,421 batch 936/1044 - loss 3.98835417 - lr 0.0010 - time 322.76s
|
71 |
+
2024-07-29 12:10:14,835 batch 1040/1044 - loss 3.97300559 - lr 0.0010 - time 357.17s
|
72 |
+
2024-07-29 12:10:16,312 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-29 12:10:16,313 EPOCH 2 DONE
|
74 |
+
2024-07-29 12:10:25,715 TRAIN Loss: 3.9725
|
75 |
+
2024-07-29 12:10:25,716 DEV Loss: 5.3532
|
76 |
+
2024-07-29 12:10:25,716 DEV Perplexity: 211.2824
|
77 |
+
2024-07-29 12:10:25,716 New best score!
|
78 |
+
2024-07-29 12:10:25,717 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-29 12:10:25,717 EPOCH 3
|
80 |
+
2024-07-29 12:11:00,721 batch 104/1044 - loss 3.74404566 - lr 0.0010 - time 35.00s
|
81 |
+
2024-07-29 12:11:37,000 batch 208/1044 - loss 3.74039719 - lr 0.0010 - time 71.28s
|
82 |
+
2024-07-29 12:12:12,689 batch 312/1044 - loss 3.73733081 - lr 0.0010 - time 106.97s
|
83 |
+
2024-07-29 12:12:50,036 batch 416/1044 - loss 3.73595966 - lr 0.0010 - time 144.32s
|
84 |
+
2024-07-29 12:13:25,963 batch 520/1044 - loss 3.73209848 - lr 0.0010 - time 180.25s
|
85 |
+
2024-07-29 12:14:02,705 batch 624/1044 - loss 3.72517045 - lr 0.0010 - time 216.99s
|
86 |
+
2024-07-29 12:14:37,050 batch 728/1044 - loss 3.71656788 - lr 0.0010 - time 251.33s
|
87 |
+
2024-07-29 12:15:14,430 batch 832/1044 - loss 3.71199233 - lr 0.0010 - time 288.71s
|
88 |
+
2024-07-29 12:15:47,540 batch 936/1044 - loss 3.70660474 - lr 0.0010 - time 321.82s
|
89 |
+
2024-07-29 12:16:24,282 batch 1040/1044 - loss 3.70128052 - lr 0.0010 - time 358.56s
|
90 |
+
2024-07-29 12:16:25,556 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-29 12:16:25,557 EPOCH 3 DONE
|
92 |
+
2024-07-29 12:16:34,855 TRAIN Loss: 3.7001
|
93 |
+
2024-07-29 12:16:34,855 DEV Loss: 5.5426
|
94 |
+
2024-07-29 12:16:34,855 DEV Perplexity: 255.3533
|
95 |
+
2024-07-29 12:16:34,855 No improvement for 1 epoch(s)
|
96 |
+
2024-07-29 12:16:34,855 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-29 12:16:34,855 EPOCH 4
|
98 |
+
2024-07-29 12:17:08,897 batch 104/1044 - loss 3.50534952 - lr 0.0010 - time 34.04s
|
99 |
+
2024-07-29 12:17:44,935 batch 208/1044 - loss 3.52391773 - lr 0.0010 - time 70.08s
|
100 |
+
2024-07-29 12:18:19,368 batch 312/1044 - loss 3.52871704 - lr 0.0010 - time 104.51s
|
101 |
+
2024-07-29 12:18:54,723 batch 416/1044 - loss 3.54154933 - lr 0.0010 - time 139.87s
|
102 |
+
2024-07-29 12:19:31,551 batch 520/1044 - loss 3.54263736 - lr 0.0010 - time 176.70s
|
103 |
+
2024-07-29 12:20:06,781 batch 624/1044 - loss 3.54425060 - lr 0.0010 - time 211.93s
|
104 |
+
2024-07-29 12:20:42,762 batch 728/1044 - loss 3.54463039 - lr 0.0010 - time 247.91s
|
105 |
+
2024-07-29 12:21:17,502 batch 832/1044 - loss 3.54627667 - lr 0.0010 - time 282.65s
|
106 |
+
2024-07-29 12:21:55,127 batch 936/1044 - loss 3.54532271 - lr 0.0010 - time 320.27s
|
107 |
+
2024-07-29 12:22:29,585 batch 1040/1044 - loss 3.54458958 - lr 0.0010 - time 354.73s
|
108 |
+
2024-07-29 12:22:31,086 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-29 12:22:31,088 EPOCH 4 DONE
|
110 |
+
2024-07-29 12:22:40,420 TRAIN Loss: 3.5442
|
111 |
+
2024-07-29 12:22:40,421 DEV Loss: 5.3866
|
112 |
+
2024-07-29 12:22:40,421 DEV Perplexity: 218.4686
|
113 |
+
2024-07-29 12:22:40,421 No improvement for 2 epoch(s)
|
114 |
+
2024-07-29 12:22:40,421 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-29 12:22:40,421 EPOCH 5
|
116 |
+
2024-07-29 12:23:17,038 batch 104/1044 - loss 3.42211845 - lr 0.0010 - time 36.62s
|
117 |
+
2024-07-29 12:23:52,549 batch 208/1044 - loss 3.42455010 - lr 0.0010 - time 72.13s
|
118 |
+
2024-07-29 12:24:30,559 batch 312/1044 - loss 3.43286339 - lr 0.0010 - time 110.14s
|
119 |
+
2024-07-29 12:25:08,089 batch 416/1044 - loss 3.43605063 - lr 0.0010 - time 147.67s
|
120 |
+
2024-07-29 12:25:44,865 batch 520/1044 - loss 3.43598771 - lr 0.0010 - time 184.44s
|
121 |
+
2024-07-29 12:26:20,668 batch 624/1044 - loss 3.43451708 - lr 0.0010 - time 220.25s
|
122 |
+
2024-07-29 12:26:54,105 batch 728/1044 - loss 3.43570769 - lr 0.0010 - time 253.68s
|
123 |
+
2024-07-29 12:27:28,635 batch 832/1044 - loss 3.43511326 - lr 0.0010 - time 288.21s
|
124 |
+
2024-07-29 12:28:01,952 batch 936/1044 - loss 3.43410573 - lr 0.0010 - time 321.53s
|
125 |
+
2024-07-29 12:28:37,108 batch 1040/1044 - loss 3.43386037 - lr 0.0010 - time 356.69s
|
126 |
+
2024-07-29 12:28:39,003 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-29 12:28:39,004 EPOCH 5 DONE
|
128 |
+
2024-07-29 12:28:48,549 TRAIN Loss: 3.4338
|
129 |
+
2024-07-29 12:28:48,550 DEV Loss: 5.4090
|
130 |
+
2024-07-29 12:28:48,550 DEV Perplexity: 223.4004
|
131 |
+
2024-07-29 12:28:48,550 No improvement for 3 epoch(s)
|
132 |
+
2024-07-29 12:28:48,550 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-29 12:28:48,550 EPOCH 6
|
134 |
+
2024-07-29 12:29:26,204 batch 104/1044 - loss 3.30328703 - lr 0.0010 - time 37.65s
|
135 |
+
2024-07-29 12:29:59,628 batch 208/1044 - loss 3.31942885 - lr 0.0010 - time 71.08s
|
136 |
+
2024-07-29 12:30:35,397 batch 312/1044 - loss 3.33565913 - lr 0.0010 - time 106.85s
|
137 |
+
2024-07-29 12:31:10,660 batch 416/1044 - loss 3.34290816 - lr 0.0010 - time 142.11s
|
138 |
+
2024-07-29 12:31:47,945 batch 520/1044 - loss 3.34171666 - lr 0.0010 - time 179.40s
|
139 |
+
2024-07-29 12:32:22,995 batch 624/1044 - loss 3.33953745 - lr 0.0010 - time 214.45s
|
140 |
+
2024-07-29 12:32:57,161 batch 728/1044 - loss 3.34280647 - lr 0.0010 - time 248.61s
|
141 |
+
2024-07-29 12:33:34,037 batch 832/1044 - loss 3.33969522 - lr 0.0010 - time 285.49s
|
142 |
+
2024-07-29 12:34:10,202 batch 936/1044 - loss 3.34106811 - lr 0.0010 - time 321.65s
|
143 |
+
2024-07-29 12:34:45,150 batch 1040/1044 - loss 3.34348369 - lr 0.0010 - time 356.60s
|
144 |
+
2024-07-29 12:34:46,636 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 12:34:46,638 EPOCH 6 DONE
|
146 |
+
2024-07-29 12:34:55,993 TRAIN Loss: 3.3433
|
147 |
+
2024-07-29 12:34:55,993 DEV Loss: 5.3104
|
148 |
+
2024-07-29 12:34:55,993 DEV Perplexity: 202.4232
|
149 |
+
2024-07-29 12:34:55,993 New best score!
|
150 |
+
2024-07-29 12:34:55,995 ----------------------------------------------------------------------------------------------------
|
151 |
+
2024-07-29 12:34:55,995 EPOCH 7
|
152 |
+
2024-07-29 12:35:32,412 batch 104/1044 - loss 3.25542786 - lr 0.0010 - time 36.42s
|
153 |
+
2024-07-29 12:36:09,058 batch 208/1044 - loss 3.26068776 - lr 0.0010 - time 73.06s
|
154 |
+
2024-07-29 12:36:42,551 batch 312/1044 - loss 3.25378684 - lr 0.0010 - time 106.56s
|
155 |
+
2024-07-29 12:37:18,686 batch 416/1044 - loss 3.26140341 - lr 0.0010 - time 142.69s
|
156 |
+
2024-07-29 12:37:58,475 batch 520/1044 - loss 3.26765249 - lr 0.0010 - time 182.48s
|
157 |
+
2024-07-29 12:38:31,930 batch 624/1044 - loss 3.27660421 - lr 0.0010 - time 215.94s
|
158 |
+
2024-07-29 12:39:08,947 batch 728/1044 - loss 3.27895282 - lr 0.0010 - time 252.95s
|
159 |
+
2024-07-29 12:39:42,229 batch 832/1044 - loss 3.27778179 - lr 0.0010 - time 286.23s
|
160 |
+
2024-07-29 12:40:16,475 batch 936/1044 - loss 3.28369047 - lr 0.0010 - time 320.48s
|
161 |
+
2024-07-29 12:40:52,527 batch 1040/1044 - loss 3.28335924 - lr 0.0010 - time 356.53s
|
162 |
+
2024-07-29 12:40:53,803 ----------------------------------------------------------------------------------------------------
|
163 |
+
2024-07-29 12:40:53,804 EPOCH 7 DONE
|
164 |
+
2024-07-29 12:41:03,158 TRAIN Loss: 3.2833
|
165 |
+
2024-07-29 12:41:03,159 DEV Loss: 5.3706
|
166 |
+
2024-07-29 12:41:03,159 DEV Perplexity: 214.9955
|
167 |
+
2024-07-29 12:41:03,159 No improvement for 1 epoch(s)
|
168 |
+
2024-07-29 12:41:03,159 ----------------------------------------------------------------------------------------------------
|
169 |
+
2024-07-29 12:41:03,159 EPOCH 8
|
170 |
+
2024-07-29 12:41:37,683 batch 104/1044 - loss 3.19396622 - lr 0.0010 - time 34.52s
|
171 |
+
2024-07-29 12:42:12,337 batch 208/1044 - loss 3.19424989 - lr 0.0010 - time 69.18s
|
172 |
+
2024-07-29 12:42:48,787 batch 312/1044 - loss 3.20324424 - lr 0.0010 - time 105.63s
|
173 |
+
2024-07-29 12:43:25,383 batch 416/1044 - loss 3.21497673 - lr 0.0010 - time 142.22s
|
174 |
+
2024-07-29 12:43:59,181 batch 520/1044 - loss 3.21523826 - lr 0.0010 - time 176.02s
|
175 |
+
2024-07-29 12:44:34,715 batch 624/1044 - loss 3.22076072 - lr 0.0010 - time 211.56s
|
176 |
+
2024-07-29 12:45:12,483 batch 728/1044 - loss 3.22582588 - lr 0.0010 - time 249.32s
|
177 |
+
2024-07-29 12:45:50,022 batch 832/1044 - loss 3.22324498 - lr 0.0010 - time 286.86s
|
178 |
+
2024-07-29 12:46:23,627 batch 936/1044 - loss 3.22702305 - lr 0.0010 - time 320.47s
|
179 |
+
2024-07-29 12:46:58,993 batch 1040/1044 - loss 3.22621578 - lr 0.0010 - time 355.83s
|
180 |
+
2024-07-29 12:47:00,548 ----------------------------------------------------------------------------------------------------
|
181 |
+
2024-07-29 12:47:00,550 EPOCH 8 DONE
|
182 |
+
2024-07-29 12:47:10,260 TRAIN Loss: 3.2260
|
183 |
+
2024-07-29 12:47:10,261 DEV Loss: 5.3901
|
184 |
+
2024-07-29 12:47:10,261 DEV Perplexity: 219.2285
|
185 |
+
2024-07-29 12:47:10,261 No improvement for 2 epoch(s)
|
186 |
+
2024-07-29 12:47:10,261 ----------------------------------------------------------------------------------------------------
|
187 |
+
2024-07-29 12:47:10,261 EPOCH 9
|
188 |
+
2024-07-29 12:47:44,939 batch 104/1044 - loss 3.15840583 - lr 0.0010 - time 34.68s
|
189 |
+
2024-07-29 12:48:19,312 batch 208/1044 - loss 3.15797923 - lr 0.0010 - time 69.05s
|
190 |
+
2024-07-29 12:48:53,400 batch 312/1044 - loss 3.15353966 - lr 0.0010 - time 103.14s
|
191 |
+
2024-07-29 12:49:28,875 batch 416/1044 - loss 3.16241378 - lr 0.0010 - time 138.61s
|
192 |
+
2024-07-29 12:50:04,666 batch 520/1044 - loss 3.16788850 - lr 0.0010 - time 174.40s
|
193 |
+
2024-07-29 12:50:44,607 batch 624/1044 - loss 3.16955657 - lr 0.0010 - time 214.35s
|
194 |
+
2024-07-29 12:51:19,767 batch 728/1044 - loss 3.17281829 - lr 0.0010 - time 249.51s
|
195 |
+
2024-07-29 12:51:53,684 batch 832/1044 - loss 3.17490795 - lr 0.0010 - time 283.42s
|
196 |
+
2024-07-29 12:52:29,149 batch 936/1044 - loss 3.17496411 - lr 0.0010 - time 318.89s
|
197 |
+
2024-07-29 12:53:04,563 batch 1040/1044 - loss 3.17747010 - lr 0.0010 - time 354.30s
|
198 |
+
2024-07-29 12:53:05,916 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-29 12:53:05,917 EPOCH 9 DONE
|
200 |
+
2024-07-29 12:53:15,201 TRAIN Loss: 3.1776
|
201 |
+
2024-07-29 12:53:15,202 DEV Loss: 5.3988
|
202 |
+
2024-07-29 12:53:15,202 DEV Perplexity: 221.1306
|
203 |
+
2024-07-29 12:53:15,202 No improvement for 3 epoch(s)
|
204 |
+
2024-07-29 12:53:15,202 ----------------------------------------------------------------------------------------------------
|
205 |
+
2024-07-29 12:53:15,202 EPOCH 10
|
206 |
+
2024-07-29 12:53:50,139 batch 104/1044 - loss 3.10803436 - lr 0.0010 - time 34.94s
|
207 |
+
2024-07-29 12:54:24,571 batch 208/1044 - loss 3.11205787 - lr 0.0010 - time 69.37s
|
208 |
+
2024-07-29 12:54:59,226 batch 312/1044 - loss 3.11798640 - lr 0.0010 - time 104.02s
|
209 |
+
2024-07-29 12:55:35,599 batch 416/1044 - loss 3.12687216 - lr 0.0010 - time 140.40s
|
210 |
+
2024-07-29 12:56:11,246 batch 520/1044 - loss 3.12420961 - lr 0.0010 - time 176.04s
|
211 |
+
2024-07-29 12:56:49,229 batch 624/1044 - loss 3.12595103 - lr 0.0010 - time 214.03s
|
212 |
+
2024-07-29 12:57:25,312 batch 728/1044 - loss 3.12430468 - lr 0.0010 - time 250.11s
|
213 |
+
2024-07-29 12:58:00,840 batch 832/1044 - loss 3.12859650 - lr 0.0010 - time 285.64s
|
214 |
+
2024-07-29 12:58:37,066 batch 936/1044 - loss 3.13335595 - lr 0.0010 - time 321.86s
|
215 |
+
2024-07-29 12:59:12,656 batch 1040/1044 - loss 3.13755236 - lr 0.0010 - time 357.45s
|
216 |
+
2024-07-29 12:59:13,737 ----------------------------------------------------------------------------------------------------
|
217 |
+
2024-07-29 12:59:13,738 EPOCH 10 DONE
|
218 |
+
2024-07-29 12:59:22,954 TRAIN Loss: 3.1375
|
219 |
+
2024-07-29 12:59:22,954 DEV Loss: 5.3597
|
220 |
+
2024-07-29 12:59:22,954 DEV Perplexity: 212.6584
|
221 |
+
2024-07-29 12:59:22,954 No improvement for 4 epoch(s)
|
222 |
+
2024-07-29 12:59:22,954 ----------------------------------------------------------------------------------------------------
|
223 |
+
2024-07-29 12:59:22,954 Finished Training
|
224 |
+
2024-07-29 12:59:40,826 TEST Perplexity: 203.8598
|
225 |
+
2024-07-29 13:09:51,573 TEST BLEU = 25.73 74.7/56.1/22.4/5.2 (BP = 0.975 ratio = 0.975 hyp_len = 158 ref_len = 162)
|
models/de2en/word_word2vec_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a354fa1d2c792fed869b5ce90924041000829cd20cbc573d25df6681f605641
|
3 |
+
size 90482856
|
models/de2en/word_word2vec_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 13:10:01,959 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 13:10:01,959 Training Model
|
3 |
+
2024-07-29 13:10:01,959 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 13:10:01,960 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(22834, 300, padding_idx=22829)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(14303, 300, padding_idx=14298)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=14303, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-29 13:10:01,960 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-29 13:10:01,960 Training Hyperparameters:
|
20 |
+
2024-07-29 13:10:01,960 - max_epochs: 10
|
21 |
+
2024-07-29 13:10:01,960 - learning_rate: 0.001
|
22 |
+
2024-07-29 13:10:01,960 - batch_size: 128
|
23 |
+
2024-07-29 13:10:01,960 - patience: 5
|
24 |
+
2024-07-29 13:10:01,960 - scheduler_patience: 3
|
25 |
+
2024-07-29 13:10:01,960 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-29 13:10:01,960 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-29 13:10:01,960 Computational Parameters:
|
28 |
+
2024-07-29 13:10:01,960 - num_workers: 4
|
29 |
+
2024-07-29 13:10:01,960 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-29 13:10:01,960 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-29 13:10:01,960 Dataset Splits:
|
32 |
+
2024-07-29 13:10:01,960 - train: 133623 data points
|
33 |
+
2024-07-29 13:10:01,960 - dev: 19090 data points
|
34 |
+
2024-07-29 13:10:01,960 - test: 38179 data points
|
35 |
+
2024-07-29 13:10:01,960 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-29 13:10:01,960 EPOCH 1
|
37 |
+
2024-07-29 13:10:38,691 batch 104/1044 - loss 6.09540410 - lr 0.0010 - time 36.73s
|
38 |
+
2024-07-29 13:11:14,718 batch 208/1044 - loss 5.78919055 - lr 0.0010 - time 72.76s
|
39 |
+
2024-07-29 13:11:50,814 batch 312/1044 - loss 5.61710594 - lr 0.0010 - time 108.85s
|
40 |
+
2024-07-29 13:12:26,499 batch 416/1044 - loss 5.49340716 - lr 0.0010 - time 144.54s
|
41 |
+
2024-07-29 13:13:06,387 batch 520/1044 - loss 5.40083544 - lr 0.0010 - time 184.43s
|
42 |
+
2024-07-29 13:13:41,761 batch 624/1044 - loss 5.31845674 - lr 0.0010 - time 219.80s
|
43 |
+
2024-07-29 13:14:18,712 batch 728/1044 - loss 5.24921891 - lr 0.0010 - time 256.75s
|
44 |
+
2024-07-29 13:14:55,607 batch 832/1044 - loss 5.19052368 - lr 0.0010 - time 293.65s
|
45 |
+
2024-07-29 13:15:29,260 batch 936/1044 - loss 5.13820067 - lr 0.0010 - time 327.30s
|
46 |
+
2024-07-29 13:16:05,178 batch 1040/1044 - loss 5.09115850 - lr 0.0010 - time 363.22s
|
47 |
+
2024-07-29 13:16:06,474 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-29 13:16:06,475 EPOCH 1 DONE
|
49 |
+
2024-07-29 13:16:16,857 TRAIN Loss: 5.0892
|
50 |
+
2024-07-29 13:16:16,857 DEV Loss: 5.4825
|
51 |
+
2024-07-29 13:16:16,857 DEV Perplexity: 240.4453
|
52 |
+
2024-07-29 13:16:16,857 New best score!
|
53 |
+
2024-07-29 13:16:16,858 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-29 13:16:16,858 EPOCH 2
|
55 |
+
2024-07-29 13:16:51,292 batch 104/1044 - loss 4.54567204 - lr 0.0010 - time 34.43s
|
56 |
+
2024-07-29 13:17:27,399 batch 208/1044 - loss 4.54911235 - lr 0.0010 - time 70.54s
|
57 |
+
2024-07-29 13:18:04,634 batch 312/1044 - loss 4.54159739 - lr 0.0010 - time 107.78s
|
58 |
+
2024-07-29 13:18:42,927 batch 416/1044 - loss 4.52325806 - lr 0.0010 - time 146.07s
|
59 |
+
2024-07-29 13:19:16,844 batch 520/1044 - loss 4.51451812 - lr 0.0010 - time 179.99s
|
60 |
+
2024-07-29 13:19:53,415 batch 624/1044 - loss 4.50047546 - lr 0.0010 - time 216.56s
|
61 |
+
2024-07-29 13:20:28,251 batch 728/1044 - loss 4.48981222 - lr 0.0010 - time 251.39s
|
62 |
+
2024-07-29 13:21:04,925 batch 832/1044 - loss 4.47670500 - lr 0.0010 - time 288.07s
|
63 |
+
2024-07-29 13:21:43,623 batch 936/1044 - loss 4.46603177 - lr 0.0010 - time 326.76s
|
64 |
+
2024-07-29 13:22:20,836 batch 1040/1044 - loss 4.45305521 - lr 0.0010 - time 363.98s
|
65 |
+
2024-07-29 13:22:22,582 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-29 13:22:22,583 EPOCH 2 DONE
|
67 |
+
2024-07-29 13:22:33,063 TRAIN Loss: 4.4525
|
68 |
+
2024-07-29 13:22:33,063 DEV Loss: 5.3427
|
69 |
+
2024-07-29 13:22:33,063 DEV Perplexity: 209.0671
|
70 |
+
2024-07-29 13:22:33,063 New best score!
|
71 |
+
2024-07-29 13:22:33,064 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-29 13:22:33,064 EPOCH 3
|
73 |
+
2024-07-29 13:23:06,659 batch 104/1044 - loss 4.21869541 - lr 0.0010 - time 33.59s
|
74 |
+
2024-07-29 13:23:46,121 batch 208/1044 - loss 4.23292076 - lr 0.0010 - time 73.06s
|
75 |
+
2024-07-29 13:24:23,549 batch 312/1044 - loss 4.22644233 - lr 0.0010 - time 110.48s
|
76 |
+
2024-07-29 13:24:59,508 batch 416/1044 - loss 4.22344791 - lr 0.0010 - time 146.44s
|
77 |
+
2024-07-29 13:25:35,014 batch 520/1044 - loss 4.22421972 - lr 0.0010 - time 181.95s
|
78 |
+
2024-07-29 13:26:10,928 batch 624/1044 - loss 4.22022673 - lr 0.0010 - time 217.86s
|
79 |
+
2024-07-29 13:26:47,876 batch 728/1044 - loss 4.21593372 - lr 0.0010 - time 254.81s
|
80 |
+
2024-07-29 13:27:26,289 batch 832/1044 - loss 4.21460853 - lr 0.0010 - time 293.22s
|
81 |
+
2024-07-29 13:28:05,065 batch 936/1044 - loss 4.20808099 - lr 0.0010 - time 332.00s
|
82 |
+
2024-07-29 13:28:39,636 batch 1040/1044 - loss 4.20587891 - lr 0.0010 - time 366.57s
|
83 |
+
2024-07-29 13:28:41,000 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-29 13:28:41,001 EPOCH 3 DONE
|
85 |
+
2024-07-29 13:28:51,460 TRAIN Loss: 4.2055
|
86 |
+
2024-07-29 13:28:51,460 DEV Loss: 5.2903
|
87 |
+
2024-07-29 13:28:51,460 DEV Perplexity: 198.4051
|
88 |
+
2024-07-29 13:28:51,460 New best score!
|
89 |
+
2024-07-29 13:28:51,461 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-29 13:28:51,461 EPOCH 4
|
91 |
+
2024-07-29 13:29:24,846 batch 104/1044 - loss 4.02041008 - lr 0.0010 - time 33.38s
|
92 |
+
2024-07-29 13:30:01,477 batch 208/1044 - loss 4.02628114 - lr 0.0010 - time 70.02s
|
93 |
+
2024-07-29 13:30:36,100 batch 312/1044 - loss 4.03258949 - lr 0.0010 - time 104.64s
|
94 |
+
2024-07-29 13:31:12,440 batch 416/1044 - loss 4.03809558 - lr 0.0010 - time 140.98s
|
95 |
+
2024-07-29 13:31:49,714 batch 520/1044 - loss 4.03893923 - lr 0.0010 - time 178.25s
|
96 |
+
2024-07-29 13:32:27,353 batch 624/1044 - loss 4.03847505 - lr 0.0010 - time 215.89s
|
97 |
+
2024-07-29 13:33:05,109 batch 728/1044 - loss 4.03748313 - lr 0.0010 - time 253.65s
|
98 |
+
2024-07-29 13:33:43,996 batch 832/1044 - loss 4.03620893 - lr 0.0010 - time 292.53s
|
99 |
+
2024-07-29 13:34:21,101 batch 936/1044 - loss 4.03685949 - lr 0.0010 - time 329.64s
|
100 |
+
2024-07-29 13:34:58,407 batch 1040/1044 - loss 4.03850191 - lr 0.0010 - time 366.95s
|
101 |
+
2024-07-29 13:34:59,732 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-29 13:34:59,733 EPOCH 4 DONE
|
103 |
+
2024-07-29 13:35:10,035 TRAIN Loss: 4.0381
|
104 |
+
2024-07-29 13:35:10,036 DEV Loss: 5.2363
|
105 |
+
2024-07-29 13:35:10,036 DEV Perplexity: 187.9764
|
106 |
+
2024-07-29 13:35:10,036 New best score!
|
107 |
+
2024-07-29 13:35:10,037 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-29 13:35:10,037 EPOCH 5
|
109 |
+
2024-07-29 13:35:45,922 batch 104/1044 - loss 3.91698917 - lr 0.0010 - time 35.89s
|
110 |
+
2024-07-29 13:36:23,367 batch 208/1044 - loss 3.91381397 - lr 0.0010 - time 73.33s
|
111 |
+
2024-07-29 13:36:58,024 batch 312/1044 - loss 3.91127859 - lr 0.0010 - time 107.99s
|
112 |
+
2024-07-29 13:37:34,733 batch 416/1044 - loss 3.91103300 - lr 0.0010 - time 144.70s
|
113 |
+
2024-07-29 13:38:11,550 batch 520/1044 - loss 3.91122542 - lr 0.0010 - time 181.51s
|
114 |
+
2024-07-29 13:38:47,704 batch 624/1044 - loss 3.91083060 - lr 0.0010 - time 217.67s
|
115 |
+
2024-07-29 13:39:24,810 batch 728/1044 - loss 3.91623834 - lr 0.0010 - time 254.77s
|
116 |
+
2024-07-29 13:40:03,413 batch 832/1044 - loss 3.91736075 - lr 0.0010 - time 293.38s
|
117 |
+
2024-07-29 13:40:40,364 batch 936/1044 - loss 3.91753032 - lr 0.0010 - time 330.33s
|
118 |
+
2024-07-29 13:41:15,969 batch 1040/1044 - loss 3.91917481 - lr 0.0010 - time 365.93s
|
119 |
+
2024-07-29 13:41:17,132 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-29 13:41:17,133 EPOCH 5 DONE
|
121 |
+
2024-07-29 13:41:27,459 TRAIN Loss: 3.9196
|
122 |
+
2024-07-29 13:41:27,459 DEV Loss: 5.2010
|
123 |
+
2024-07-29 13:41:27,459 DEV Perplexity: 181.4572
|
124 |
+
2024-07-29 13:41:27,459 New best score!
|
125 |
+
2024-07-29 13:41:27,460 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-29 13:41:27,460 EPOCH 6
|
127 |
+
2024-07-29 13:42:04,286 batch 104/1044 - loss 3.78032496 - lr 0.0010 - time 36.83s
|
128 |
+
2024-07-29 13:42:37,961 batch 208/1044 - loss 3.78429960 - lr 0.0010 - time 70.50s
|
129 |
+
2024-07-29 13:43:13,155 batch 312/1044 - loss 3.78731439 - lr 0.0010 - time 105.69s
|
130 |
+
2024-07-29 13:43:52,863 batch 416/1044 - loss 3.79086867 - lr 0.0010 - time 145.40s
|
131 |
+
2024-07-29 13:44:28,077 batch 520/1044 - loss 3.79898581 - lr 0.0010 - time 180.62s
|
132 |
+
2024-07-29 13:45:05,088 batch 624/1044 - loss 3.80209827 - lr 0.0010 - time 217.63s
|
133 |
+
2024-07-29 13:45:43,452 batch 728/1044 - loss 3.80721706 - lr 0.0010 - time 255.99s
|
134 |
+
2024-07-29 13:46:18,399 batch 832/1044 - loss 3.80967595 - lr 0.0010 - time 290.94s
|
135 |
+
2024-07-29 13:46:54,816 batch 936/1044 - loss 3.81078050 - lr 0.0010 - time 327.36s
|
136 |
+
2024-07-29 13:47:30,169 batch 1040/1044 - loss 3.81289031 - lr 0.0010 - time 362.71s
|
137 |
+
2024-07-29 13:47:31,788 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-29 13:47:31,789 EPOCH 6 DONE
|
139 |
+
2024-07-29 13:47:42,401 TRAIN Loss: 3.8139
|
140 |
+
2024-07-29 13:47:42,402 DEV Loss: 5.2510
|
141 |
+
2024-07-29 13:47:42,402 DEV Perplexity: 190.7481
|
142 |
+
2024-07-29 13:47:42,402 No improvement for 1 epoch(s)
|
143 |
+
2024-07-29 13:47:42,402 ----------------------------------------------------------------------------------------------------
|
144 |
+
2024-07-29 13:47:42,402 EPOCH 7
|
145 |
+
2024-07-29 13:48:20,393 batch 104/1044 - loss 3.64564601 - lr 0.0010 - time 37.99s
|
146 |
+
2024-07-29 13:48:59,218 batch 208/1044 - loss 3.67913320 - lr 0.0010 - time 76.82s
|
147 |
+
2024-07-29 13:49:36,078 batch 312/1044 - loss 3.68023382 - lr 0.0010 - time 113.68s
|
148 |
+
2024-07-29 13:50:11,575 batch 416/1044 - loss 3.69422242 - lr 0.0010 - time 149.17s
|
149 |
+
2024-07-29 13:50:45,973 batch 520/1044 - loss 3.70225501 - lr 0.0010 - time 183.57s
|
150 |
+
2024-07-29 13:51:20,714 batch 624/1044 - loss 3.71231662 - lr 0.0010 - time 218.31s
|
151 |
+
2024-07-29 13:51:57,642 batch 728/1044 - loss 3.72161044 - lr 0.0010 - time 255.24s
|
152 |
+
2024-07-29 13:52:32,926 batch 832/1044 - loss 3.72604369 - lr 0.0010 - time 290.52s
|
153 |
+
2024-07-29 13:53:07,846 batch 936/1044 - loss 3.72842573 - lr 0.0010 - time 325.44s
|
154 |
+
2024-07-29 13:53:46,824 batch 1040/1044 - loss 3.73105854 - lr 0.0010 - time 364.42s
|
155 |
+
2024-07-29 13:53:48,019 ----------------------------------------------------------------------------------------------------
|
156 |
+
2024-07-29 13:53:48,020 EPOCH 7 DONE
|
157 |
+
2024-07-29 13:53:58,450 TRAIN Loss: 3.7312
|
158 |
+
2024-07-29 13:53:58,450 DEV Loss: 5.3064
|
159 |
+
2024-07-29 13:53:58,450 DEV Perplexity: 201.6147
|
160 |
+
2024-07-29 13:53:58,450 No improvement for 2 epoch(s)
|
161 |
+
2024-07-29 13:53:58,450 ----------------------------------------------------------------------------------------------------
|
162 |
+
2024-07-29 13:53:58,450 EPOCH 8
|
163 |
+
2024-07-29 13:54:36,926 batch 104/1044 - loss 3.59860957 - lr 0.0010 - time 38.48s
|
164 |
+
2024-07-29 13:55:14,756 batch 208/1044 - loss 3.60496652 - lr 0.0010 - time 76.31s
|
165 |
+
2024-07-29 13:55:50,534 batch 312/1044 - loss 3.61502466 - lr 0.0010 - time 112.08s
|
166 |
+
2024-07-29 13:56:26,494 batch 416/1044 - loss 3.61783856 - lr 0.0010 - time 148.04s
|
167 |
+
2024-07-29 13:57:02,451 batch 520/1044 - loss 3.62817164 - lr 0.0010 - time 184.00s
|
168 |
+
2024-07-29 13:57:38,362 batch 624/1044 - loss 3.63620102 - lr 0.0010 - time 219.91s
|
169 |
+
2024-07-29 13:58:14,886 batch 728/1044 - loss 3.64191881 - lr 0.0010 - time 256.44s
|
170 |
+
2024-07-29 13:58:52,636 batch 832/1044 - loss 3.64611779 - lr 0.0010 - time 294.19s
|
171 |
+
2024-07-29 13:59:28,400 batch 936/1044 - loss 3.64988911 - lr 0.0010 - time 329.95s
|
172 |
+
2024-07-29 14:00:04,604 batch 1040/1044 - loss 3.65293422 - lr 0.0010 - time 366.15s
|
173 |
+
2024-07-29 14:00:05,905 ----------------------------------------------------------------------------------------------------
|
174 |
+
2024-07-29 14:00:05,906 EPOCH 8 DONE
|
175 |
+
2024-07-29 14:00:16,297 TRAIN Loss: 3.6527
|
176 |
+
2024-07-29 14:00:16,298 DEV Loss: 5.2927
|
177 |
+
2024-07-29 14:00:16,298 DEV Perplexity: 198.8844
|
178 |
+
2024-07-29 14:00:16,298 No improvement for 3 epoch(s)
|
179 |
+
2024-07-29 14:00:16,298 ----------------------------------------------------------------------------------------------------
|
180 |
+
2024-07-29 14:00:16,298 EPOCH 9
|
181 |
+
2024-07-29 14:00:52,567 batch 104/1044 - loss 3.52726772 - lr 0.0010 - time 36.27s
|
182 |
+
2024-07-29 14:01:29,328 batch 208/1044 - loss 3.53966258 - lr 0.0010 - time 73.03s
|
183 |
+
2024-07-29 14:02:03,698 batch 312/1044 - loss 3.54471479 - lr 0.0010 - time 107.40s
|
184 |
+
2024-07-29 14:02:42,288 batch 416/1044 - loss 3.55352108 - lr 0.0010 - time 145.99s
|
185 |
+
2024-07-29 14:03:17,708 batch 520/1044 - loss 3.56031944 - lr 0.0010 - time 181.41s
|
186 |
+
2024-07-29 14:03:54,432 batch 624/1044 - loss 3.56560187 - lr 0.0010 - time 218.13s
|
187 |
+
2024-07-29 14:04:29,029 batch 728/1044 - loss 3.57430677 - lr 0.0010 - time 252.73s
|
188 |
+
2024-07-29 14:05:07,881 batch 832/1044 - loss 3.58089666 - lr 0.0010 - time 291.58s
|
189 |
+
2024-07-29 14:05:47,328 batch 936/1044 - loss 3.58696306 - lr 0.0010 - time 331.03s
|
190 |
+
2024-07-29 14:06:22,763 batch 1040/1044 - loss 3.58961468 - lr 0.0010 - time 366.47s
|
191 |
+
2024-07-29 14:06:24,360 ----------------------------------------------------------------------------------------------------
|
192 |
+
2024-07-29 14:06:24,361 EPOCH 9 DONE
|
193 |
+
2024-07-29 14:06:34,769 TRAIN Loss: 3.5901
|
194 |
+
2024-07-29 14:06:34,769 DEV Loss: 5.3270
|
195 |
+
2024-07-29 14:06:34,769 DEV Perplexity: 205.8242
|
196 |
+
2024-07-29 14:06:34,769 No improvement for 4 epoch(s)
|
197 |
+
2024-07-29 14:06:34,769 ----------------------------------------------------------------------------------------------------
|
198 |
+
2024-07-29 14:06:34,769 EPOCH 10
|
199 |
+
2024-07-29 14:07:13,321 batch 104/1044 - loss 3.42520954 - lr 0.0001 - time 38.55s
|
200 |
+
2024-07-29 14:07:49,862 batch 208/1044 - loss 3.40331735 - lr 0.0001 - time 75.09s
|
201 |
+
2024-07-29 14:08:23,810 batch 312/1044 - loss 3.40658811 - lr 0.0001 - time 109.04s
|
202 |
+
2024-07-29 14:08:58,653 batch 416/1044 - loss 3.39734012 - lr 0.0001 - time 143.88s
|
203 |
+
2024-07-29 14:09:36,466 batch 520/1044 - loss 3.39226825 - lr 0.0001 - time 181.70s
|
204 |
+
2024-07-29 14:10:15,012 batch 624/1044 - loss 3.39046824 - lr 0.0001 - time 220.24s
|
205 |
+
2024-07-29 14:10:52,050 batch 728/1044 - loss 3.39249443 - lr 0.0001 - time 257.28s
|
206 |
+
2024-07-29 14:11:27,660 batch 832/1044 - loss 3.39002826 - lr 0.0001 - time 292.89s
|
207 |
+
2024-07-29 14:12:05,743 batch 936/1044 - loss 3.38926826 - lr 0.0001 - time 330.97s
|
208 |
+
2024-07-29 14:12:42,871 batch 1040/1044 - loss 3.39050238 - lr 0.0001 - time 368.10s
|
209 |
+
2024-07-29 14:12:44,193 ----------------------------------------------------------------------------------------------------
|
210 |
+
2024-07-29 14:12:44,194 EPOCH 10 DONE
|
211 |
+
2024-07-29 14:12:54,893 TRAIN Loss: 3.3906
|
212 |
+
2024-07-29 14:12:54,894 DEV Loss: 5.3173
|
213 |
+
2024-07-29 14:12:54,894 DEV Perplexity: 203.8274
|
214 |
+
2024-07-29 14:12:54,894 No improvement for 5 epoch(s)
|
215 |
+
2024-07-29 14:12:54,894 Patience reached: Terminating model training due to early stopping
|
216 |
+
2024-07-29 14:12:54,894 ----------------------------------------------------------------------------------------------------
|
217 |
+
2024-07-29 14:12:54,894 Finished Training
|
218 |
+
2024-07-29 14:13:15,035 TEST Perplexity: 181.8180
|
219 |
+
2024-07-29 14:16:12,477 TEST BLEU = 32.60 91.6/63.4/25.9/7.5 (BP = 1.000 ratio = 1.000 hyp_len = 83 ref_len = 83)
|
models/de2en/word_word2vec_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47fa6f98adbb53e706ca80b9961a83af80e2c92ea3181073053dbb6b16528aa9
|
3 |
+
size 139401140
|
models/en2de/character_end2end_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 04:43:06,934 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 04:43:06,934 Training Model
|
3 |
+
2024-07-29 04:43:06,934 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 04:43:06,934 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(114, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(112, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=112, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-29 04:43:06,934 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-29 04:43:06,934 Training Hyperparameters:
|
27 |
+
2024-07-29 04:43:06,934 - max_epochs: 10
|
28 |
+
2024-07-29 04:43:06,934 - learning_rate: 0.001
|
29 |
+
2024-07-29 04:43:06,934 - batch_size: 128
|
30 |
+
2024-07-29 04:43:06,934 - patience: 5
|
31 |
+
2024-07-29 04:43:06,934 - scheduler_patience: 3
|
32 |
+
2024-07-29 04:43:06,934 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-29 04:43:06,934 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-29 04:43:06,934 Computational Parameters:
|
35 |
+
2024-07-29 04:43:06,934 - num_workers: 4
|
36 |
+
2024-07-29 04:43:06,934 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-29 04:43:06,934 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-29 04:43:06,934 Dataset Splits:
|
39 |
+
2024-07-29 04:43:06,934 - train: 133623 data points
|
40 |
+
2024-07-29 04:43:06,934 - dev: 19090 data points
|
41 |
+
2024-07-29 04:43:06,934 - test: 38179 data points
|
42 |
+
2024-07-29 04:43:06,935 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-29 04:43:06,935 EPOCH 1
|
44 |
+
2024-07-29 04:46:03,502 batch 104/1044 - loss 2.83783023 - lr 0.0010 - time 176.57s
|
45 |
+
2024-07-29 04:48:58,358 batch 208/1044 - loss 2.67827428 - lr 0.0010 - time 351.42s
|
46 |
+
2024-07-29 04:52:09,047 batch 312/1044 - loss 2.59119082 - lr 0.0010 - time 542.11s
|
47 |
+
2024-07-29 04:55:23,591 batch 416/1044 - loss 2.52991555 - lr 0.0010 - time 736.66s
|
48 |
+
2024-07-29 04:58:24,345 batch 520/1044 - loss 2.48547669 - lr 0.0010 - time 917.41s
|
49 |
+
2024-07-29 05:01:11,473 batch 624/1044 - loss 2.44637715 - lr 0.0010 - time 1084.54s
|
50 |
+
2024-07-29 05:04:20,046 batch 728/1044 - loss 2.41217192 - lr 0.0010 - time 1273.11s
|
51 |
+
2024-07-29 05:07:28,110 batch 832/1044 - loss 2.37809223 - lr 0.0010 - time 1461.18s
|
52 |
+
2024-07-29 05:10:38,372 batch 936/1044 - loss 2.34602575 - lr 0.0010 - time 1651.44s
|
53 |
+
2024-07-29 05:13:32,549 batch 1040/1044 - loss 2.31563680 - lr 0.0010 - time 1825.61s
|
54 |
+
2024-07-29 05:13:39,106 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-29 05:13:39,108 EPOCH 1 DONE
|
56 |
+
2024-07-29 05:14:26,303 TRAIN Loss: 2.3144
|
57 |
+
2024-07-29 05:14:26,303 DEV Loss: 3.5700
|
58 |
+
2024-07-29 05:14:26,303 DEV Perplexity: 35.5166
|
59 |
+
2024-07-29 05:14:26,303 New best score!
|
60 |
+
2024-07-29 05:14:26,305 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-29 05:14:26,305 EPOCH 2
|
62 |
+
2024-07-29 05:17:25,271 batch 104/1044 - loss 2.02556723 - lr 0.0010 - time 178.97s
|
63 |
+
2024-07-29 05:20:25,054 batch 208/1044 - loss 2.00942771 - lr 0.0010 - time 358.75s
|
64 |
+
2024-07-29 05:23:12,883 batch 312/1044 - loss 1.99176520 - lr 0.0010 - time 526.58s
|
65 |
+
2024-07-29 05:26:08,804 batch 416/1044 - loss 1.97854575 - lr 0.0010 - time 702.50s
|
66 |
+
2024-07-29 05:29:14,936 batch 520/1044 - loss 1.97086978 - lr 0.0010 - time 888.63s
|
67 |
+
2024-07-29 05:32:21,237 batch 624/1044 - loss 1.95995870 - lr 0.0010 - time 1074.93s
|
68 |
+
2024-07-29 05:35:20,854 batch 728/1044 - loss 1.95067503 - lr 0.0010 - time 1254.55s
|
69 |
+
2024-07-29 05:38:34,956 batch 832/1044 - loss 1.94326082 - lr 0.0010 - time 1448.65s
|
70 |
+
2024-07-29 05:41:48,006 batch 936/1044 - loss 1.93362772 - lr 0.0010 - time 1641.70s
|
71 |
+
2024-07-29 05:44:42,067 batch 1040/1044 - loss 1.92524348 - lr 0.0010 - time 1815.76s
|
72 |
+
2024-07-29 05:44:50,207 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-29 05:44:50,210 EPOCH 2 DONE
|
74 |
+
2024-07-29 05:45:37,466 TRAIN Loss: 1.9249
|
75 |
+
2024-07-29 05:45:37,466 DEV Loss: 3.8374
|
76 |
+
2024-07-29 05:45:37,466 DEV Perplexity: 46.4067
|
77 |
+
2024-07-29 05:45:37,466 No improvement for 1 epoch(s)
|
78 |
+
2024-07-29 05:45:37,466 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-29 05:45:37,466 EPOCH 3
|
80 |
+
2024-07-29 05:48:43,560 batch 104/1044 - loss 1.82380688 - lr 0.0010 - time 186.09s
|
81 |
+
2024-07-29 05:51:53,714 batch 208/1044 - loss 1.82825828 - lr 0.0010 - time 376.25s
|
82 |
+
2024-07-29 05:55:08,715 batch 312/1044 - loss 1.82657076 - lr 0.0010 - time 571.25s
|
83 |
+
2024-07-29 05:58:07,203 batch 416/1044 - loss 1.82265144 - lr 0.0010 - time 749.74s
|
84 |
+
2024-07-29 06:00:58,968 batch 520/1044 - loss 1.81858461 - lr 0.0010 - time 921.50s
|
85 |
+
2024-07-29 06:03:59,822 batch 624/1044 - loss 1.80977892 - lr 0.0010 - time 1102.36s
|
86 |
+
2024-07-29 06:07:08,066 batch 728/1044 - loss 1.80312389 - lr 0.0010 - time 1290.60s
|
87 |
+
2024-07-29 06:10:01,948 batch 832/1044 - loss 1.79834272 - lr 0.0010 - time 1464.48s
|
88 |
+
2024-07-29 06:12:49,654 batch 936/1044 - loss 1.79244394 - lr 0.0010 - time 1632.19s
|
89 |
+
2024-07-29 06:15:41,378 batch 1040/1044 - loss 1.78895096 - lr 0.0010 - time 1803.91s
|
90 |
+
2024-07-29 06:15:47,180 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-29 06:15:47,183 EPOCH 3 DONE
|
92 |
+
2024-07-29 06:16:34,306 TRAIN Loss: 1.7889
|
93 |
+
2024-07-29 06:16:34,306 DEV Loss: 3.8489
|
94 |
+
2024-07-29 06:16:34,306 DEV Perplexity: 46.9422
|
95 |
+
2024-07-29 06:16:34,307 No improvement for 2 epoch(s)
|
96 |
+
2024-07-29 06:16:34,307 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-29 06:16:34,307 EPOCH 4
|
98 |
+
2024-07-29 06:19:47,695 batch 104/1044 - loss 1.72615880 - lr 0.0010 - time 193.39s
|
99 |
+
2024-07-29 06:22:47,789 batch 208/1044 - loss 1.72849645 - lr 0.0010 - time 373.48s
|
100 |
+
2024-07-29 06:25:49,316 batch 312/1044 - loss 1.72645533 - lr 0.0010 - time 555.01s
|
101 |
+
2024-07-29 06:28:43,932 batch 416/1044 - loss 1.72066385 - lr 0.0010 - time 729.63s
|
102 |
+
2024-07-29 06:31:56,479 batch 520/1044 - loss 1.71717779 - lr 0.0010 - time 922.17s
|
103 |
+
2024-07-29 06:34:57,754 batch 624/1044 - loss 1.71594436 - lr 0.0010 - time 1103.45s
|
104 |
+
2024-07-29 06:37:51,089 batch 728/1044 - loss 1.71165972 - lr 0.0010 - time 1276.78s
|
105 |
+
2024-07-29 06:40:52,402 batch 832/1044 - loss 1.70951752 - lr 0.0010 - time 1458.10s
|
106 |
+
2024-07-29 06:43:46,624 batch 936/1044 - loss 1.70553106 - lr 0.0010 - time 1632.32s
|
107 |
+
2024-07-29 06:46:41,386 batch 1040/1044 - loss 1.70329877 - lr 0.0010 - time 1807.08s
|
108 |
+
2024-07-29 06:46:48,093 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-29 06:46:48,095 EPOCH 4 DONE
|
110 |
+
2024-07-29 06:47:35,218 TRAIN Loss: 1.7032
|
111 |
+
2024-07-29 06:47:35,219 DEV Loss: 4.1957
|
112 |
+
2024-07-29 06:47:35,219 DEV Perplexity: 66.3981
|
113 |
+
2024-07-29 06:47:35,219 No improvement for 3 epoch(s)
|
114 |
+
2024-07-29 06:47:35,219 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-29 06:47:35,219 EPOCH 5
|
116 |
+
2024-07-29 06:50:45,524 batch 104/1044 - loss 1.64844567 - lr 0.0010 - time 190.31s
|
117 |
+
2024-07-29 06:53:48,606 batch 208/1044 - loss 1.64985944 - lr 0.0010 - time 373.39s
|
118 |
+
2024-07-29 06:56:52,667 batch 312/1044 - loss 1.65055201 - lr 0.0010 - time 557.45s
|
119 |
+
2024-07-29 06:59:51,714 batch 416/1044 - loss 1.65345511 - lr 0.0010 - time 736.50s
|
120 |
+
2024-07-29 07:02:52,445 batch 520/1044 - loss 1.65111495 - lr 0.0010 - time 917.23s
|
121 |
+
2024-07-29 07:06:00,096 batch 624/1044 - loss 1.65081866 - lr 0.0010 - time 1104.88s
|
122 |
+
2024-07-29 07:09:16,066 batch 728/1044 - loss 1.64957887 - lr 0.0010 - time 1300.85s
|
123 |
+
2024-07-29 07:12:15,087 batch 832/1044 - loss 1.64832800 - lr 0.0010 - time 1479.87s
|
124 |
+
2024-07-29 07:15:10,030 batch 936/1044 - loss 1.64612010 - lr 0.0010 - time 1654.81s
|
125 |
+
2024-07-29 07:18:02,140 batch 1040/1044 - loss 1.64496474 - lr 0.0010 - time 1826.92s
|
126 |
+
2024-07-29 07:18:08,591 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-29 07:18:08,594 EPOCH 5 DONE
|
128 |
+
2024-07-29 07:18:55,835 TRAIN Loss: 1.6448
|
129 |
+
2024-07-29 07:18:55,835 DEV Loss: 4.0923
|
130 |
+
2024-07-29 07:18:55,835 DEV Perplexity: 59.8790
|
131 |
+
2024-07-29 07:18:55,835 No improvement for 4 epoch(s)
|
132 |
+
2024-07-29 07:18:55,835 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-29 07:18:55,835 EPOCH 6
|
134 |
+
2024-07-29 07:21:53,160 batch 104/1044 - loss 1.58821843 - lr 0.0001 - time 177.32s
|
135 |
+
2024-07-29 07:24:44,349 batch 208/1044 - loss 1.59108787 - lr 0.0001 - time 348.51s
|
136 |
+
2024-07-29 07:27:37,622 batch 312/1044 - loss 1.58441215 - lr 0.0001 - time 521.79s
|
137 |
+
2024-07-29 07:30:43,750 batch 416/1044 - loss 1.58090937 - lr 0.0001 - time 707.91s
|
138 |
+
2024-07-29 07:33:54,621 batch 520/1044 - loss 1.58090223 - lr 0.0001 - time 898.79s
|
139 |
+
2024-07-29 07:36:52,832 batch 624/1044 - loss 1.58009594 - lr 0.0001 - time 1077.00s
|
140 |
+
2024-07-29 07:40:09,071 batch 728/1044 - loss 1.57836947 - lr 0.0001 - time 1273.24s
|
141 |
+
2024-07-29 07:43:11,085 batch 832/1044 - loss 1.57711583 - lr 0.0001 - time 1455.25s
|
142 |
+
2024-07-29 07:46:18,514 batch 936/1044 - loss 1.57624354 - lr 0.0001 - time 1642.68s
|
143 |
+
2024-07-29 07:49:05,093 batch 1040/1044 - loss 1.57536047 - lr 0.0001 - time 1809.26s
|
144 |
+
2024-07-29 07:49:11,696 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 07:49:11,699 EPOCH 6 DONE
|
146 |
+
2024-07-29 07:49:59,010 TRAIN Loss: 1.5752
|
147 |
+
2024-07-29 07:49:59,010 DEV Loss: 4.1991
|
148 |
+
2024-07-29 07:49:59,010 DEV Perplexity: 66.6274
|
149 |
+
2024-07-29 07:49:59,010 No improvement for 5 epoch(s)
|
150 |
+
2024-07-29 07:49:59,010 Patience reached: Terminating model training due to early stopping
|
151 |
+
2024-07-29 07:49:59,010 ----------------------------------------------------------------------------------------------------
|
152 |
+
2024-07-29 07:49:59,010 Finished Training
|
153 |
+
2024-07-29 07:51:31,366 TEST Perplexity: 35.5327
|
154 |
+
2024-07-29 08:02:43,738 TEST BLEU = 4.47 45.6/8.8/2.0/0.5 (BP = 1.000 ratio = 1.000 hyp_len = 103 ref_len = 103)
|
models/en2de/character_end2end_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67f0a4b19898316afa15bfb102be43b49cbb6ca418a73c80039a2ff952d7f028
|
3 |
+
size 15945704
|
models/en2de/character_end2end_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 08:02:54,900 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 08:02:54,900 Training Model
|
3 |
+
2024-07-29 08:02:54,900 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 08:02:54,900 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(114, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(112, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=112, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-29 08:02:54,900 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-29 08:02:54,900 Training Hyperparameters:
|
20 |
+
2024-07-29 08:02:54,900 - max_epochs: 10
|
21 |
+
2024-07-29 08:02:54,900 - learning_rate: 0.001
|
22 |
+
2024-07-29 08:02:54,900 - batch_size: 128
|
23 |
+
2024-07-29 08:02:54,900 - patience: 5
|
24 |
+
2024-07-29 08:02:54,900 - scheduler_patience: 3
|
25 |
+
2024-07-29 08:02:54,900 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-29 08:02:54,900 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-29 08:02:54,900 Computational Parameters:
|
28 |
+
2024-07-29 08:02:54,900 - num_workers: 4
|
29 |
+
2024-07-29 08:02:54,900 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-29 08:02:54,900 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-29 08:02:54,900 Dataset Splits:
|
32 |
+
2024-07-29 08:02:54,900 - train: 133623 data points
|
33 |
+
2024-07-29 08:02:54,900 - dev: 19090 data points
|
34 |
+
2024-07-29 08:02:54,900 - test: 38179 data points
|
35 |
+
2024-07-29 08:02:54,900 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-29 08:02:54,900 EPOCH 1
|
37 |
+
2024-07-29 08:03:53,216 batch 104/1044 - loss 2.73587249 - lr 0.0010 - time 58.32s
|
38 |
+
2024-07-29 08:04:49,000 batch 208/1044 - loss 2.60424047 - lr 0.0010 - time 114.10s
|
39 |
+
2024-07-29 08:05:41,466 batch 312/1044 - loss 2.53508188 - lr 0.0010 - time 166.57s
|
40 |
+
2024-07-29 08:06:40,078 batch 416/1044 - loss 2.48822718 - lr 0.0010 - time 225.18s
|
41 |
+
2024-07-29 08:07:39,192 batch 520/1044 - loss 2.45172488 - lr 0.0010 - time 284.29s
|
42 |
+
2024-07-29 08:08:34,794 batch 624/1044 - loss 2.42237450 - lr 0.0010 - time 339.89s
|
43 |
+
2024-07-29 08:09:31,742 batch 728/1044 - loss 2.39725696 - lr 0.0010 - time 396.84s
|
44 |
+
2024-07-29 08:10:29,289 batch 832/1044 - loss 2.37658248 - lr 0.0010 - time 454.39s
|
45 |
+
2024-07-29 08:11:26,901 batch 936/1044 - loss 2.35841719 - lr 0.0010 - time 512.00s
|
46 |
+
2024-07-29 08:12:23,384 batch 1040/1044 - loss 2.34286929 - lr 0.0010 - time 568.48s
|
47 |
+
2024-07-29 08:12:25,463 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-29 08:12:25,466 EPOCH 1 DONE
|
49 |
+
2024-07-29 08:12:53,761 TRAIN Loss: 2.3424
|
50 |
+
2024-07-29 08:12:53,761 DEV Loss: 3.0900
|
51 |
+
2024-07-29 08:12:53,761 DEV Perplexity: 21.9763
|
52 |
+
2024-07-29 08:12:53,761 New best score!
|
53 |
+
2024-07-29 08:12:53,762 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-29 08:12:53,762 EPOCH 2
|
55 |
+
2024-07-29 08:13:52,690 batch 104/1044 - loss 2.18471333 - lr 0.0010 - time 58.93s
|
56 |
+
2024-07-29 08:14:49,182 batch 208/1044 - loss 2.17075370 - lr 0.0010 - time 115.42s
|
57 |
+
2024-07-29 08:15:46,935 batch 312/1044 - loss 2.16476290 - lr 0.0010 - time 173.17s
|
58 |
+
2024-07-29 08:16:43,859 batch 416/1044 - loss 2.15894632 - lr 0.0010 - time 230.10s
|
59 |
+
2024-07-29 08:17:40,100 batch 520/1044 - loss 2.15486173 - lr 0.0010 - time 286.34s
|
60 |
+
2024-07-29 08:18:36,802 batch 624/1044 - loss 2.15043282 - lr 0.0010 - time 343.04s
|
61 |
+
2024-07-29 08:19:30,048 batch 728/1044 - loss 2.14694826 - lr 0.0010 - time 396.29s
|
62 |
+
2024-07-29 08:20:28,026 batch 832/1044 - loss 2.14419594 - lr 0.0010 - time 454.26s
|
63 |
+
2024-07-29 08:21:25,600 batch 936/1044 - loss 2.14010675 - lr 0.0010 - time 511.84s
|
64 |
+
2024-07-29 08:22:25,420 batch 1040/1044 - loss 2.13670866 - lr 0.0010 - time 571.66s
|
65 |
+
2024-07-29 08:22:27,758 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-29 08:22:27,762 EPOCH 2 DONE
|
67 |
+
2024-07-29 08:22:55,713 TRAIN Loss: 2.1365
|
68 |
+
2024-07-29 08:22:55,714 DEV Loss: 3.1892
|
69 |
+
2024-07-29 08:22:55,714 DEV Perplexity: 24.2695
|
70 |
+
2024-07-29 08:22:55,714 No improvement for 1 epoch(s)
|
71 |
+
2024-07-29 08:22:55,714 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-29 08:22:55,714 EPOCH 3
|
73 |
+
2024-07-29 08:23:53,091 batch 104/1044 - loss 2.08751571 - lr 0.0010 - time 57.38s
|
74 |
+
2024-07-29 08:24:50,619 batch 208/1044 - loss 2.08733297 - lr 0.0010 - time 114.90s
|
75 |
+
2024-07-29 08:25:48,704 batch 312/1044 - loss 2.08495532 - lr 0.0010 - time 172.99s
|
76 |
+
2024-07-29 08:26:46,137 batch 416/1044 - loss 2.08294034 - lr 0.0010 - time 230.42s
|
77 |
+
2024-07-29 08:27:41,812 batch 520/1044 - loss 2.08286387 - lr 0.0010 - time 286.10s
|
78 |
+
2024-07-29 08:28:37,415 batch 624/1044 - loss 2.07837076 - lr 0.0010 - time 341.70s
|
79 |
+
2024-07-29 08:29:35,773 batch 728/1044 - loss 2.07550259 - lr 0.0010 - time 400.06s
|
80 |
+
2024-07-29 08:30:32,773 batch 832/1044 - loss 2.07277058 - lr 0.0010 - time 457.06s
|
81 |
+
2024-07-29 08:31:31,914 batch 936/1044 - loss 2.06922043 - lr 0.0010 - time 516.20s
|
82 |
+
2024-07-29 08:32:27,776 batch 1040/1044 - loss 2.06737263 - lr 0.0010 - time 572.06s
|
83 |
+
2024-07-29 08:32:29,985 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-29 08:32:29,987 EPOCH 3 DONE
|
85 |
+
2024-07-29 08:32:58,150 TRAIN Loss: 2.0673
|
86 |
+
2024-07-29 08:32:58,150 DEV Loss: 3.2107
|
87 |
+
2024-07-29 08:32:58,150 DEV Perplexity: 24.7975
|
88 |
+
2024-07-29 08:32:58,150 No improvement for 2 epoch(s)
|
89 |
+
2024-07-29 08:32:58,150 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-29 08:32:58,150 EPOCH 4
|
91 |
+
2024-07-29 08:33:55,013 batch 104/1044 - loss 2.04089623 - lr 0.0010 - time 56.86s
|
92 |
+
2024-07-29 08:34:52,898 batch 208/1044 - loss 2.03903778 - lr 0.0010 - time 114.75s
|
93 |
+
2024-07-29 08:35:51,119 batch 312/1044 - loss 2.03777666 - lr 0.0010 - time 172.97s
|
94 |
+
2024-07-29 08:36:48,063 batch 416/1044 - loss 2.03265216 - lr 0.0010 - time 229.91s
|
95 |
+
2024-07-29 08:37:43,123 batch 520/1044 - loss 2.03068389 - lr 0.0010 - time 284.97s
|
96 |
+
2024-07-29 08:38:42,281 batch 624/1044 - loss 2.02925459 - lr 0.0010 - time 344.13s
|
97 |
+
2024-07-29 08:39:38,619 batch 728/1044 - loss 2.02635143 - lr 0.0010 - time 400.47s
|
98 |
+
2024-07-29 08:40:34,110 batch 832/1044 - loss 2.02490569 - lr 0.0010 - time 455.96s
|
99 |
+
2024-07-29 08:41:30,332 batch 936/1044 - loss 2.02244815 - lr 0.0010 - time 512.18s
|
100 |
+
2024-07-29 08:42:26,605 batch 1040/1044 - loss 2.02155263 - lr 0.0010 - time 568.45s
|
101 |
+
2024-07-29 08:42:28,905 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-29 08:42:28,908 EPOCH 4 DONE
|
103 |
+
2024-07-29 08:42:56,907 TRAIN Loss: 2.0215
|
104 |
+
2024-07-29 08:42:56,908 DEV Loss: 3.3884
|
105 |
+
2024-07-29 08:42:56,908 DEV Perplexity: 29.6186
|
106 |
+
2024-07-29 08:42:56,908 No improvement for 3 epoch(s)
|
107 |
+
2024-07-29 08:42:56,908 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-29 08:42:56,908 EPOCH 5
|
109 |
+
2024-07-29 08:43:54,221 batch 104/1044 - loss 1.99417387 - lr 0.0010 - time 57.31s
|
110 |
+
2024-07-29 08:44:52,997 batch 208/1044 - loss 1.98792041 - lr 0.0010 - time 116.09s
|
111 |
+
2024-07-29 08:45:48,150 batch 312/1044 - loss 1.99154850 - lr 0.0010 - time 171.24s
|
112 |
+
2024-07-29 08:46:45,419 batch 416/1044 - loss 1.99533101 - lr 0.0010 - time 228.51s
|
113 |
+
2024-07-29 08:47:44,326 batch 520/1044 - loss 1.99671145 - lr 0.0010 - time 287.42s
|
114 |
+
2024-07-29 08:48:42,269 batch 624/1044 - loss 1.99625001 - lr 0.0010 - time 345.36s
|
115 |
+
2024-07-29 08:49:37,222 batch 728/1044 - loss 1.99431187 - lr 0.0010 - time 400.31s
|
116 |
+
2024-07-29 08:50:32,593 batch 832/1044 - loss 1.99355745 - lr 0.0010 - time 455.68s
|
117 |
+
2024-07-29 08:51:28,854 batch 936/1044 - loss 1.99387271 - lr 0.0010 - time 511.95s
|
118 |
+
2024-07-29 08:52:26,219 batch 1040/1044 - loss 1.99341333 - lr 0.0010 - time 569.31s
|
119 |
+
2024-07-29 08:52:28,341 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-29 08:52:28,343 EPOCH 5 DONE
|
121 |
+
2024-07-29 08:52:56,407 TRAIN Loss: 1.9933
|
122 |
+
2024-07-29 08:52:56,407 DEV Loss: 3.4417
|
123 |
+
2024-07-29 08:52:56,407 DEV Perplexity: 31.2411
|
124 |
+
2024-07-29 08:52:56,408 No improvement for 4 epoch(s)
|
125 |
+
2024-07-29 08:52:56,408 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-29 08:52:56,408 EPOCH 6
|
127 |
+
2024-07-29 08:53:53,603 batch 104/1044 - loss 1.94269003 - lr 0.0001 - time 57.20s
|
128 |
+
2024-07-29 08:54:47,873 batch 208/1044 - loss 1.94458400 - lr 0.0001 - time 111.47s
|
129 |
+
2024-07-29 08:55:46,532 batch 312/1044 - loss 1.94421510 - lr 0.0001 - time 170.12s
|
130 |
+
2024-07-29 08:56:43,219 batch 416/1044 - loss 1.94502676 - lr 0.0001 - time 226.81s
|
131 |
+
2024-07-29 08:57:40,699 batch 520/1044 - loss 1.94408302 - lr 0.0001 - time 284.29s
|
132 |
+
2024-07-29 08:58:39,775 batch 624/1044 - loss 1.94322916 - lr 0.0001 - time 343.37s
|
133 |
+
2024-07-29 08:59:37,572 batch 728/1044 - loss 1.94293834 - lr 0.0001 - time 401.16s
|
134 |
+
2024-07-29 09:00:35,569 batch 832/1044 - loss 1.94358721 - lr 0.0001 - time 459.16s
|
135 |
+
2024-07-29 09:01:31,771 batch 936/1044 - loss 1.94235871 - lr 0.0001 - time 515.36s
|
136 |
+
2024-07-29 09:02:29,148 batch 1040/1044 - loss 1.94160419 - lr 0.0001 - time 572.74s
|
137 |
+
2024-07-29 09:02:31,339 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-29 09:02:31,341 EPOCH 6 DONE
|
139 |
+
2024-07-29 09:02:59,432 TRAIN Loss: 1.9417
|
140 |
+
2024-07-29 09:02:59,433 DEV Loss: 3.2860
|
141 |
+
2024-07-29 09:02:59,433 DEV Perplexity: 26.7353
|
142 |
+
2024-07-29 09:02:59,433 No improvement for 5 epoch(s)
|
143 |
+
2024-07-29 09:02:59,433 Patience reached: Terminating model training due to early stopping
|
144 |
+
2024-07-29 09:02:59,433 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 09:02:59,433 Finished Training
|
146 |
+
2024-07-29 09:03:55,129 TEST Perplexity: 21.9740
|
147 |
+
2024-07-29 09:14:15,480 TEST BLEU = 4.83 42.3/11.5/2.1/0.5 (BP = 1.000 ratio = 1.000 hyp_len = 97 ref_len = 97)
|
models/en2de/character_end2end_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f6b8536b96b9ae7267b7186adda9c4bcb5a38e64422c2bcfb76ffc8f9fbfa75
|
3 |
+
size 35800820
|
models/en2de/word_end2end_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 09:14:44,992 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 09:14:44,992 Training Model
|
3 |
+
2024-07-29 09:14:44,992 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 09:14:44,992 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(14303, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(22834, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=22834, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-29 09:14:44,992 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-29 09:14:44,992 Training Hyperparameters:
|
27 |
+
2024-07-29 09:14:44,992 - max_epochs: 10
|
28 |
+
2024-07-29 09:14:44,992 - learning_rate: 0.001
|
29 |
+
2024-07-29 09:14:44,992 - batch_size: 128
|
30 |
+
2024-07-29 09:14:44,992 - patience: 5
|
31 |
+
2024-07-29 09:14:44,992 - scheduler_patience: 3
|
32 |
+
2024-07-29 09:14:44,992 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-29 09:14:44,992 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-29 09:14:44,992 Computational Parameters:
|
35 |
+
2024-07-29 09:14:44,992 - num_workers: 4
|
36 |
+
2024-07-29 09:14:44,992 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-29 09:14:44,992 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-29 09:14:44,992 Dataset Splits:
|
39 |
+
2024-07-29 09:14:44,992 - train: 133623 data points
|
40 |
+
2024-07-29 09:14:44,992 - dev: 19090 data points
|
41 |
+
2024-07-29 09:14:44,992 - test: 38179 data points
|
42 |
+
2024-07-29 09:14:44,992 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-29 09:14:44,992 EPOCH 1
|
44 |
+
2024-07-29 09:15:31,214 batch 104/1044 - loss 6.68762229 - lr 0.0010 - time 46.22s
|
45 |
+
2024-07-29 09:16:13,594 batch 208/1044 - loss 6.40201276 - lr 0.0010 - time 88.60s
|
46 |
+
2024-07-29 09:17:01,978 batch 312/1044 - loss 6.22368844 - lr 0.0010 - time 136.99s
|
47 |
+
2024-07-29 09:17:50,779 batch 416/1044 - loss 6.07568611 - lr 0.0010 - time 185.79s
|
48 |
+
2024-07-29 09:18:39,278 batch 520/1044 - loss 5.94940980 - lr 0.0010 - time 234.29s
|
49 |
+
2024-07-29 09:19:27,693 batch 624/1044 - loss 5.84451641 - lr 0.0010 - time 282.70s
|
50 |
+
2024-07-29 09:20:12,651 batch 728/1044 - loss 5.74972569 - lr 0.0010 - time 327.66s
|
51 |
+
2024-07-29 09:20:56,581 batch 832/1044 - loss 5.66821399 - lr 0.0010 - time 371.59s
|
52 |
+
2024-07-29 09:21:46,421 batch 936/1044 - loss 5.59120232 - lr 0.0010 - time 421.43s
|
53 |
+
2024-07-29 09:22:32,658 batch 1040/1044 - loss 5.52066185 - lr 0.0010 - time 467.67s
|
54 |
+
2024-07-29 09:22:34,966 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-29 09:22:34,967 EPOCH 1 DONE
|
56 |
+
2024-07-29 09:22:44,703 TRAIN Loss: 5.5176
|
57 |
+
2024-07-29 09:22:44,703 DEV Loss: 5.6851
|
58 |
+
2024-07-29 09:22:44,703 DEV Perplexity: 294.4404
|
59 |
+
2024-07-29 09:22:44,703 New best score!
|
60 |
+
2024-07-29 09:22:44,704 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-29 09:22:44,704 EPOCH 2
|
62 |
+
2024-07-29 09:23:30,459 batch 104/1044 - loss 4.70927820 - lr 0.0010 - time 45.75s
|
63 |
+
2024-07-29 09:24:14,570 batch 208/1044 - loss 4.67138333 - lr 0.0010 - time 89.87s
|
64 |
+
2024-07-29 09:25:04,646 batch 312/1044 - loss 4.64380088 - lr 0.0010 - time 139.94s
|
65 |
+
2024-07-29 09:25:53,202 batch 416/1044 - loss 4.61442350 - lr 0.0010 - time 188.50s
|
66 |
+
2024-07-29 09:26:40,127 batch 520/1044 - loss 4.58126969 - lr 0.0010 - time 235.42s
|
67 |
+
2024-07-29 09:27:23,730 batch 624/1044 - loss 4.56022843 - lr 0.0010 - time 279.03s
|
68 |
+
2024-07-29 09:28:10,576 batch 728/1044 - loss 4.53555526 - lr 0.0010 - time 325.87s
|
69 |
+
2024-07-29 09:28:57,617 batch 832/1044 - loss 4.51384200 - lr 0.0010 - time 372.91s
|
70 |
+
2024-07-29 09:29:44,095 batch 936/1044 - loss 4.49083310 - lr 0.0010 - time 419.39s
|
71 |
+
2024-07-29 09:30:30,312 batch 1040/1044 - loss 4.46784579 - lr 0.0010 - time 465.61s
|
72 |
+
2024-07-29 09:30:32,342 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-29 09:30:32,343 EPOCH 2 DONE
|
74 |
+
2024-07-29 09:30:42,331 TRAIN Loss: 4.4668
|
75 |
+
2024-07-29 09:30:42,332 DEV Loss: 5.4737
|
76 |
+
2024-07-29 09:30:42,332 DEV Perplexity: 238.3490
|
77 |
+
2024-07-29 09:30:42,332 New best score!
|
78 |
+
2024-07-29 09:30:42,333 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-29 09:30:42,333 EPOCH 3
|
80 |
+
2024-07-29 09:31:28,557 batch 104/1044 - loss 4.09782187 - lr 0.0010 - time 46.22s
|
81 |
+
2024-07-29 09:32:13,046 batch 208/1044 - loss 4.07343847 - lr 0.0010 - time 90.71s
|
82 |
+
2024-07-29 09:33:01,218 batch 312/1044 - loss 4.06736985 - lr 0.0010 - time 138.89s
|
83 |
+
2024-07-29 09:33:48,591 batch 416/1044 - loss 4.06640469 - lr 0.0010 - time 186.26s
|
84 |
+
2024-07-29 09:34:34,291 batch 520/1044 - loss 4.05470543 - lr 0.0010 - time 231.96s
|
85 |
+
2024-07-29 09:35:18,867 batch 624/1044 - loss 4.04913080 - lr 0.0010 - time 276.53s
|
86 |
+
2024-07-29 09:36:05,310 batch 728/1044 - loss 4.03836787 - lr 0.0010 - time 322.98s
|
87 |
+
2024-07-29 09:36:52,483 batch 832/1044 - loss 4.02993985 - lr 0.0010 - time 370.15s
|
88 |
+
2024-07-29 09:37:45,596 batch 936/1044 - loss 4.02110605 - lr 0.0010 - time 423.26s
|
89 |
+
2024-07-29 09:38:32,022 batch 1040/1044 - loss 4.01265615 - lr 0.0010 - time 469.69s
|
90 |
+
2024-07-29 09:38:33,788 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-29 09:38:33,789 EPOCH 3 DONE
|
92 |
+
2024-07-29 09:38:43,523 TRAIN Loss: 4.0125
|
93 |
+
2024-07-29 09:38:43,523 DEV Loss: 5.3448
|
94 |
+
2024-07-29 09:38:43,523 DEV Perplexity: 209.5177
|
95 |
+
2024-07-29 09:38:43,523 New best score!
|
96 |
+
2024-07-29 09:38:43,525 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-29 09:38:43,525 EPOCH 4
|
98 |
+
2024-07-29 09:39:30,022 batch 104/1044 - loss 3.75320538 - lr 0.0010 - time 46.50s
|
99 |
+
2024-07-29 09:40:13,650 batch 208/1044 - loss 3.75565803 - lr 0.0010 - time 90.13s
|
100 |
+
2024-07-29 09:41:03,234 batch 312/1044 - loss 3.75648206 - lr 0.0010 - time 139.71s
|
101 |
+
2024-07-29 09:41:48,285 batch 416/1044 - loss 3.75922136 - lr 0.0010 - time 184.76s
|
102 |
+
2024-07-29 09:42:35,725 batch 520/1044 - loss 3.76481164 - lr 0.0010 - time 232.20s
|
103 |
+
2024-07-29 09:43:23,771 batch 624/1044 - loss 3.76968772 - lr 0.0010 - time 280.25s
|
104 |
+
2024-07-29 09:44:10,270 batch 728/1044 - loss 3.76599022 - lr 0.0010 - time 326.75s
|
105 |
+
2024-07-29 09:44:53,931 batch 832/1044 - loss 3.76276653 - lr 0.0010 - time 370.41s
|
106 |
+
2024-07-29 09:45:42,287 batch 936/1044 - loss 3.76076873 - lr 0.0010 - time 418.76s
|
107 |
+
2024-07-29 09:46:28,290 batch 1040/1044 - loss 3.75842409 - lr 0.0010 - time 464.77s
|
108 |
+
2024-07-29 09:46:29,970 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-29 09:46:29,971 EPOCH 4 DONE
|
110 |
+
2024-07-29 09:46:39,744 TRAIN Loss: 3.7582
|
111 |
+
2024-07-29 09:46:39,744 DEV Loss: 5.4143
|
112 |
+
2024-07-29 09:46:39,744 DEV Perplexity: 224.5854
|
113 |
+
2024-07-29 09:46:39,744 No improvement for 1 epoch(s)
|
114 |
+
2024-07-29 09:46:39,744 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-29 09:46:39,744 EPOCH 5
|
116 |
+
2024-07-29 09:47:25,640 batch 104/1044 - loss 3.56488176 - lr 0.0010 - time 45.90s
|
117 |
+
2024-07-29 09:48:14,673 batch 208/1044 - loss 3.56000491 - lr 0.0010 - time 94.93s
|
118 |
+
2024-07-29 09:48:58,050 batch 312/1044 - loss 3.57685460 - lr 0.0010 - time 138.31s
|
119 |
+
2024-07-29 09:49:43,738 batch 416/1044 - loss 3.58028894 - lr 0.0010 - time 183.99s
|
120 |
+
2024-07-29 09:50:31,676 batch 520/1044 - loss 3.57720232 - lr 0.0010 - time 231.93s
|
121 |
+
2024-07-29 09:51:18,904 batch 624/1044 - loss 3.57581482 - lr 0.0010 - time 279.16s
|
122 |
+
2024-07-29 09:52:03,833 batch 728/1044 - loss 3.57699984 - lr 0.0010 - time 324.09s
|
123 |
+
2024-07-29 09:52:51,054 batch 832/1044 - loss 3.57911868 - lr 0.0010 - time 371.31s
|
124 |
+
2024-07-29 09:53:37,238 batch 936/1044 - loss 3.57788490 - lr 0.0010 - time 417.49s
|
125 |
+
2024-07-29 09:54:24,863 batch 1040/1044 - loss 3.57766181 - lr 0.0010 - time 465.12s
|
126 |
+
2024-07-29 09:54:26,601 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-29 09:54:26,602 EPOCH 5 DONE
|
128 |
+
2024-07-29 09:54:36,652 TRAIN Loss: 3.5769
|
129 |
+
2024-07-29 09:54:36,653 DEV Loss: 5.4411
|
130 |
+
2024-07-29 09:54:36,653 DEV Perplexity: 230.6889
|
131 |
+
2024-07-29 09:54:36,653 No improvement for 2 epoch(s)
|
132 |
+
2024-07-29 09:54:36,653 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-29 09:54:36,653 EPOCH 6
|
134 |
+
2024-07-29 09:55:22,723 batch 104/1044 - loss 3.39798842 - lr 0.0010 - time 46.07s
|
135 |
+
2024-07-29 09:56:07,779 batch 208/1044 - loss 3.43187746 - lr 0.0010 - time 91.13s
|
136 |
+
2024-07-29 09:56:52,164 batch 312/1044 - loss 3.43835380 - lr 0.0010 - time 135.51s
|
137 |
+
2024-07-29 09:57:36,906 batch 416/1044 - loss 3.43365484 - lr 0.0010 - time 180.25s
|
138 |
+
2024-07-29 09:58:22,708 batch 520/1044 - loss 3.43188294 - lr 0.0010 - time 226.05s
|
139 |
+
2024-07-29 09:59:10,056 batch 624/1044 - loss 3.43404266 - lr 0.0010 - time 273.40s
|
140 |
+
2024-07-29 09:59:56,703 batch 728/1044 - loss 3.43836744 - lr 0.0010 - time 320.05s
|
141 |
+
2024-07-29 10:00:44,029 batch 832/1044 - loss 3.43877697 - lr 0.0010 - time 367.38s
|
142 |
+
2024-07-29 10:01:33,532 batch 936/1044 - loss 3.44485102 - lr 0.0010 - time 416.88s
|
143 |
+
2024-07-29 10:02:20,897 batch 1040/1044 - loss 3.44382606 - lr 0.0010 - time 464.24s
|
144 |
+
2024-07-29 10:02:22,705 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 10:02:22,707 EPOCH 6 DONE
|
146 |
+
2024-07-29 10:02:32,569 TRAIN Loss: 3.4442
|
147 |
+
2024-07-29 10:02:32,570 DEV Loss: 5.2779
|
148 |
+
2024-07-29 10:02:32,570 DEV Perplexity: 195.9482
|
149 |
+
2024-07-29 10:02:32,570 New best score!
|
150 |
+
2024-07-29 10:02:32,571 ----------------------------------------------------------------------------------------------------
|
151 |
+
2024-07-29 10:02:32,571 EPOCH 7
|
152 |
+
2024-07-29 10:03:19,357 batch 104/1044 - loss 3.29575888 - lr 0.0010 - time 46.79s
|
153 |
+
2024-07-29 10:04:04,483 batch 208/1044 - loss 3.29152036 - lr 0.0010 - time 91.91s
|
154 |
+
2024-07-29 10:04:51,913 batch 312/1044 - loss 3.28376382 - lr 0.0010 - time 139.34s
|
155 |
+
2024-07-29 10:05:40,678 batch 416/1044 - loss 3.29208179 - lr 0.0010 - time 188.11s
|
156 |
+
2024-07-29 10:06:24,489 batch 520/1044 - loss 3.29858603 - lr 0.0010 - time 231.92s
|
157 |
+
2024-07-29 10:07:10,490 batch 624/1044 - loss 3.30361310 - lr 0.0010 - time 277.92s
|
158 |
+
2024-07-29 10:07:54,971 batch 728/1044 - loss 3.31160711 - lr 0.0010 - time 322.40s
|
159 |
+
2024-07-29 10:08:42,467 batch 832/1044 - loss 3.31577718 - lr 0.0010 - time 369.90s
|
160 |
+
2024-07-29 10:09:31,183 batch 936/1044 - loss 3.32118058 - lr 0.0010 - time 418.61s
|
161 |
+
2024-07-29 10:10:16,777 batch 1040/1044 - loss 3.32573676 - lr 0.0010 - time 464.21s
|
162 |
+
2024-07-29 10:10:18,375 ----------------------------------------------------------------------------------------------------
|
163 |
+
2024-07-29 10:10:18,377 EPOCH 7 DONE
|
164 |
+
2024-07-29 10:10:28,406 TRAIN Loss: 3.3257
|
165 |
+
2024-07-29 10:10:28,406 DEV Loss: 5.2686
|
166 |
+
2024-07-29 10:10:28,406 DEV Perplexity: 194.1387
|
167 |
+
2024-07-29 10:10:28,406 New best score!
|
168 |
+
2024-07-29 10:10:28,407 ----------------------------------------------------------------------------------------------------
|
169 |
+
2024-07-29 10:10:28,407 EPOCH 8
|
170 |
+
2024-07-29 10:11:17,920 batch 104/1044 - loss 3.16426933 - lr 0.0010 - time 49.51s
|
171 |
+
2024-07-29 10:12:02,554 batch 208/1044 - loss 3.20414807 - lr 0.0010 - time 94.15s
|
172 |
+
2024-07-29 10:12:48,729 batch 312/1044 - loss 3.20626744 - lr 0.0010 - time 140.32s
|
173 |
+
2024-07-29 10:13:35,177 batch 416/1044 - loss 3.21369808 - lr 0.0010 - time 186.77s
|
174 |
+
2024-07-29 10:14:19,058 batch 520/1044 - loss 3.22363345 - lr 0.0010 - time 230.65s
|
175 |
+
2024-07-29 10:15:05,021 batch 624/1044 - loss 3.22743286 - lr 0.0010 - time 276.61s
|
176 |
+
2024-07-29 10:15:51,259 batch 728/1044 - loss 3.23338501 - lr 0.0010 - time 322.85s
|
177 |
+
2024-07-29 10:16:37,192 batch 832/1044 - loss 3.24250125 - lr 0.0010 - time 368.78s
|
178 |
+
2024-07-29 10:17:24,202 batch 936/1044 - loss 3.24898902 - lr 0.0010 - time 415.79s
|
179 |
+
2024-07-29 10:18:12,656 batch 1040/1044 - loss 3.25282626 - lr 0.0010 - time 464.25s
|
180 |
+
2024-07-29 10:18:14,655 ----------------------------------------------------------------------------------------------------
|
181 |
+
2024-07-29 10:18:14,657 EPOCH 8 DONE
|
182 |
+
2024-07-29 10:18:24,423 TRAIN Loss: 3.2528
|
183 |
+
2024-07-29 10:18:24,423 DEV Loss: 5.4322
|
184 |
+
2024-07-29 10:18:24,423 DEV Perplexity: 228.6570
|
185 |
+
2024-07-29 10:18:24,423 No improvement for 1 epoch(s)
|
186 |
+
2024-07-29 10:18:24,423 ----------------------------------------------------------------------------------------------------
|
187 |
+
2024-07-29 10:18:24,423 EPOCH 9
|
188 |
+
2024-07-29 10:19:11,257 batch 104/1044 - loss 3.11123835 - lr 0.0010 - time 46.83s
|
189 |
+
2024-07-29 10:19:56,480 batch 208/1044 - loss 3.11838686 - lr 0.0010 - time 92.06s
|
190 |
+
2024-07-29 10:20:41,453 batch 312/1044 - loss 3.12775021 - lr 0.0010 - time 137.03s
|
191 |
+
2024-07-29 10:21:29,264 batch 416/1044 - loss 3.13006304 - lr 0.0010 - time 184.84s
|
192 |
+
2024-07-29 10:22:13,410 batch 520/1044 - loss 3.13652410 - lr 0.0010 - time 228.99s
|
193 |
+
2024-07-29 10:23:01,104 batch 624/1044 - loss 3.14098946 - lr 0.0010 - time 276.68s
|
194 |
+
2024-07-29 10:23:49,984 batch 728/1044 - loss 3.15212496 - lr 0.0010 - time 325.56s
|
195 |
+
2024-07-29 10:24:35,093 batch 832/1044 - loss 3.15861385 - lr 0.0010 - time 370.67s
|
196 |
+
2024-07-29 10:25:22,475 batch 936/1044 - loss 3.16115363 - lr 0.0010 - time 418.05s
|
197 |
+
2024-07-29 10:26:10,608 batch 1040/1044 - loss 3.16771574 - lr 0.0010 - time 466.18s
|
198 |
+
2024-07-29 10:26:12,777 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-29 10:26:12,779 EPOCH 9 DONE
|
200 |
+
2024-07-29 10:26:22,507 TRAIN Loss: 3.1674
|
201 |
+
2024-07-29 10:26:22,507 DEV Loss: 5.3844
|
202 |
+
2024-07-29 10:26:22,508 DEV Perplexity: 217.9797
|
203 |
+
2024-07-29 10:26:22,508 No improvement for 2 epoch(s)
|
204 |
+
2024-07-29 10:26:22,508 ----------------------------------------------------------------------------------------------------
|
205 |
+
2024-07-29 10:26:22,508 EPOCH 10
|
206 |
+
2024-07-29 10:27:09,577 batch 104/1044 - loss 3.04956690 - lr 0.0010 - time 47.07s
|
207 |
+
2024-07-29 10:27:55,687 batch 208/1044 - loss 3.05954124 - lr 0.0010 - time 93.18s
|
208 |
+
2024-07-29 10:28:42,283 batch 312/1044 - loss 3.07472156 - lr 0.0010 - time 139.78s
|
209 |
+
2024-07-29 10:29:28,041 batch 416/1044 - loss 3.08242494 - lr 0.0010 - time 185.53s
|
210 |
+
2024-07-29 10:30:13,933 batch 520/1044 - loss 3.08639167 - lr 0.0010 - time 231.43s
|
211 |
+
2024-07-29 10:31:03,928 batch 624/1044 - loss 3.09298112 - lr 0.0010 - time 281.42s
|
212 |
+
2024-07-29 10:31:52,849 batch 728/1044 - loss 3.09874452 - lr 0.0010 - time 330.34s
|
213 |
+
2024-07-29 10:32:38,018 batch 832/1044 - loss 3.10353403 - lr 0.0010 - time 375.51s
|
214 |
+
2024-07-29 10:33:23,795 batch 936/1044 - loss 3.10650877 - lr 0.0010 - time 421.29s
|
215 |
+
2024-07-29 10:34:09,619 batch 1040/1044 - loss 3.10740027 - lr 0.0010 - time 467.11s
|
216 |
+
2024-07-29 10:34:11,922 ----------------------------------------------------------------------------------------------------
|
217 |
+
2024-07-29 10:34:11,923 EPOCH 10 DONE
|
218 |
+
2024-07-29 10:34:21,779 TRAIN Loss: 3.1072
|
219 |
+
2024-07-29 10:34:21,780 DEV Loss: 5.3914
|
220 |
+
2024-07-29 10:34:21,780 DEV Perplexity: 219.5024
|
221 |
+
2024-07-29 10:34:21,780 No improvement for 3 epoch(s)
|
222 |
+
2024-07-29 10:34:21,780 ----------------------------------------------------------------------------------------------------
|
223 |
+
2024-07-29 10:34:21,780 Finished Training
|
224 |
+
2024-07-29 10:34:40,855 TEST Perplexity: 193.6740
|
225 |
+
2024-07-29 10:41:21,550 TEST BLEU = 32.52 86.7/62.7/29.3/7.0 (BP = 1.000 ratio = 1.000 hyp_len = 60 ref_len = 60)
|
models/en2de/word_end2end_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:352fe682763ff8fcd1e8134a044cfa17ba55daf487b50cca2a76ec9a8b5e04f8
|
3 |
+
size 107988840
|
models/en2de/word_end2end_embeddings_without_attention/log.txt
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 10:41:51,295 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 10:41:51,295 Training Model
|
3 |
+
2024-07-29 10:41:51,295 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 10:41:51,295 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(14303, 300, padding_idx=0)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True, bidirectional=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(22834, 300, padding_idx=0)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 1024, batch_first=True)
|
14 |
+
(hidden2vocab): Linear(in_features=1024, out_features=22834, bias=True)
|
15 |
+
(log_softmax): LogSoftmax(dim=-1)
|
16 |
+
)
|
17 |
+
)
|
18 |
+
2024-07-29 10:41:51,295 ----------------------------------------------------------------------------------------------------
|
19 |
+
2024-07-29 10:41:51,295 Training Hyperparameters:
|
20 |
+
2024-07-29 10:41:51,295 - max_epochs: 10
|
21 |
+
2024-07-29 10:41:51,295 - learning_rate: 0.001
|
22 |
+
2024-07-29 10:41:51,295 - batch_size: 128
|
23 |
+
2024-07-29 10:41:51,295 - patience: 5
|
24 |
+
2024-07-29 10:41:51,295 - scheduler_patience: 3
|
25 |
+
2024-07-29 10:41:51,295 - teacher_forcing_ratio: 0.5
|
26 |
+
2024-07-29 10:41:51,295 ----------------------------------------------------------------------------------------------------
|
27 |
+
2024-07-29 10:41:51,295 Computational Parameters:
|
28 |
+
2024-07-29 10:41:51,295 - num_workers: 4
|
29 |
+
2024-07-29 10:41:51,295 - device: device(type='cuda', index=0)
|
30 |
+
2024-07-29 10:41:51,295 ----------------------------------------------------------------------------------------------------
|
31 |
+
2024-07-29 10:41:51,295 Dataset Splits:
|
32 |
+
2024-07-29 10:41:51,295 - train: 133623 data points
|
33 |
+
2024-07-29 10:41:51,295 - dev: 19090 data points
|
34 |
+
2024-07-29 10:41:51,296 - test: 38179 data points
|
35 |
+
2024-07-29 10:41:51,296 ----------------------------------------------------------------------------------------------------
|
36 |
+
2024-07-29 10:41:51,296 EPOCH 1
|
37 |
+
2024-07-29 10:42:43,980 batch 104/1044 - loss 6.56599054 - lr 0.0010 - time 52.68s
|
38 |
+
2024-07-29 10:43:35,196 batch 208/1044 - loss 6.28009422 - lr 0.0010 - time 103.90s
|
39 |
+
2024-07-29 10:44:25,168 batch 312/1044 - loss 6.11249907 - lr 0.0010 - time 153.87s
|
40 |
+
2024-07-29 10:45:15,557 batch 416/1044 - loss 5.99013720 - lr 0.0010 - time 204.26s
|
41 |
+
2024-07-29 10:46:02,970 batch 520/1044 - loss 5.89236221 - lr 0.0010 - time 251.67s
|
42 |
+
2024-07-29 10:46:51,664 batch 624/1044 - loss 5.81345889 - lr 0.0010 - time 300.37s
|
43 |
+
2024-07-29 10:47:42,555 batch 728/1044 - loss 5.74780520 - lr 0.0010 - time 351.26s
|
44 |
+
2024-07-29 10:48:33,483 batch 832/1044 - loss 5.69103370 - lr 0.0010 - time 402.19s
|
45 |
+
2024-07-29 10:49:22,573 batch 936/1044 - loss 5.63910694 - lr 0.0010 - time 451.28s
|
46 |
+
2024-07-29 10:50:14,318 batch 1040/1044 - loss 5.59255154 - lr 0.0010 - time 503.02s
|
47 |
+
2024-07-29 10:50:16,234 ----------------------------------------------------------------------------------------------------
|
48 |
+
2024-07-29 10:50:16,235 EPOCH 1 DONE
|
49 |
+
2024-07-29 10:50:29,064 TRAIN Loss: 5.5908
|
50 |
+
2024-07-29 10:50:29,064 DEV Loss: 5.7897
|
51 |
+
2024-07-29 10:50:29,064 DEV Perplexity: 326.8995
|
52 |
+
2024-07-29 10:50:29,064 New best score!
|
53 |
+
2024-07-29 10:50:29,065 ----------------------------------------------------------------------------------------------------
|
54 |
+
2024-07-29 10:50:29,065 EPOCH 2
|
55 |
+
2024-07-29 10:51:19,942 batch 104/1044 - loss 5.02739687 - lr 0.0010 - time 50.88s
|
56 |
+
2024-07-29 10:52:09,803 batch 208/1044 - loss 5.01800949 - lr 0.0010 - time 100.74s
|
57 |
+
2024-07-29 10:53:04,478 batch 312/1044 - loss 5.00509294 - lr 0.0010 - time 155.41s
|
58 |
+
2024-07-29 10:53:53,594 batch 416/1044 - loss 4.98731034 - lr 0.0010 - time 204.53s
|
59 |
+
2024-07-29 10:54:43,356 batch 520/1044 - loss 4.97219816 - lr 0.0010 - time 254.29s
|
60 |
+
2024-07-29 10:55:33,584 batch 624/1044 - loss 4.96074294 - lr 0.0010 - time 304.52s
|
61 |
+
2024-07-29 10:56:24,225 batch 728/1044 - loss 4.94472581 - lr 0.0010 - time 355.16s
|
62 |
+
2024-07-29 10:57:14,355 batch 832/1044 - loss 4.93236568 - lr 0.0010 - time 405.29s
|
63 |
+
2024-07-29 10:58:06,416 batch 936/1044 - loss 4.91768116 - lr 0.0010 - time 457.35s
|
64 |
+
2024-07-29 10:58:55,326 batch 1040/1044 - loss 4.90590350 - lr 0.0010 - time 506.26s
|
65 |
+
2024-07-29 10:58:57,793 ----------------------------------------------------------------------------------------------------
|
66 |
+
2024-07-29 10:58:57,794 EPOCH 2 DONE
|
67 |
+
2024-07-29 10:59:10,716 TRAIN Loss: 4.9057
|
68 |
+
2024-07-29 10:59:10,716 DEV Loss: 5.7132
|
69 |
+
2024-07-29 10:59:10,716 DEV Perplexity: 302.8460
|
70 |
+
2024-07-29 10:59:10,716 New best score!
|
71 |
+
2024-07-29 10:59:10,717 ----------------------------------------------------------------------------------------------------
|
72 |
+
2024-07-29 10:59:10,717 EPOCH 3
|
73 |
+
2024-07-29 11:00:00,967 batch 104/1044 - loss 4.61237117 - lr 0.0010 - time 50.25s
|
74 |
+
2024-07-29 11:00:50,203 batch 208/1044 - loss 4.62395983 - lr 0.0010 - time 99.49s
|
75 |
+
2024-07-29 11:01:40,773 batch 312/1044 - loss 4.61521491 - lr 0.0010 - time 150.06s
|
76 |
+
2024-07-29 11:02:39,135 batch 416/1044 - loss 4.61224452 - lr 0.0010 - time 208.42s
|
77 |
+
2024-07-29 11:03:30,115 batch 520/1044 - loss 4.60275617 - lr 0.0010 - time 259.40s
|
78 |
+
2024-07-29 11:04:16,479 batch 624/1044 - loss 4.59871728 - lr 0.0010 - time 305.76s
|
79 |
+
2024-07-29 11:05:07,213 batch 728/1044 - loss 4.59086315 - lr 0.0010 - time 356.50s
|
80 |
+
2024-07-29 11:05:57,731 batch 832/1044 - loss 4.58489406 - lr 0.0010 - time 407.01s
|
81 |
+
2024-07-29 11:06:46,315 batch 936/1044 - loss 4.57758889 - lr 0.0010 - time 455.60s
|
82 |
+
2024-07-29 11:07:34,550 batch 1040/1044 - loss 4.56970717 - lr 0.0010 - time 503.83s
|
83 |
+
2024-07-29 11:07:36,805 ----------------------------------------------------------------------------------------------------
|
84 |
+
2024-07-29 11:07:36,806 EPOCH 3 DONE
|
85 |
+
2024-07-29 11:07:49,727 TRAIN Loss: 4.5697
|
86 |
+
2024-07-29 11:07:49,728 DEV Loss: 5.5772
|
87 |
+
2024-07-29 11:07:49,728 DEV Perplexity: 264.3216
|
88 |
+
2024-07-29 11:07:49,728 New best score!
|
89 |
+
2024-07-29 11:07:49,729 ----------------------------------------------------------------------------------------------------
|
90 |
+
2024-07-29 11:07:49,729 EPOCH 4
|
91 |
+
2024-07-29 11:08:37,495 batch 104/1044 - loss 4.31793029 - lr 0.0010 - time 47.77s
|
92 |
+
2024-07-29 11:09:28,126 batch 208/1044 - loss 4.31731233 - lr 0.0010 - time 98.40s
|
93 |
+
2024-07-29 11:10:17,772 batch 312/1044 - loss 4.31730254 - lr 0.0010 - time 148.04s
|
94 |
+
2024-07-29 11:11:11,397 batch 416/1044 - loss 4.31653301 - lr 0.0010 - time 201.67s
|
95 |
+
2024-07-29 11:12:01,968 batch 520/1044 - loss 4.32179287 - lr 0.0010 - time 252.24s
|
96 |
+
2024-07-29 11:12:52,660 batch 624/1044 - loss 4.32694693 - lr 0.0010 - time 302.93s
|
97 |
+
2024-07-29 11:13:42,592 batch 728/1044 - loss 4.32466568 - lr 0.0010 - time 352.86s
|
98 |
+
2024-07-29 11:14:33,972 batch 832/1044 - loss 4.32141261 - lr 0.0010 - time 404.24s
|
99 |
+
2024-07-29 11:15:25,165 batch 936/1044 - loss 4.31979928 - lr 0.0010 - time 455.44s
|
100 |
+
2024-07-29 11:16:12,909 batch 1040/1044 - loss 4.31766206 - lr 0.0010 - time 503.18s
|
101 |
+
2024-07-29 11:16:14,722 ----------------------------------------------------------------------------------------------------
|
102 |
+
2024-07-29 11:16:14,723 EPOCH 4 DONE
|
103 |
+
2024-07-29 11:16:27,582 TRAIN Loss: 4.3179
|
104 |
+
2024-07-29 11:16:27,583 DEV Loss: 5.5061
|
105 |
+
2024-07-29 11:16:27,583 DEV Perplexity: 246.1892
|
106 |
+
2024-07-29 11:16:27,583 New best score!
|
107 |
+
2024-07-29 11:16:27,584 ----------------------------------------------------------------------------------------------------
|
108 |
+
2024-07-29 11:16:27,584 EPOCH 5
|
109 |
+
2024-07-29 11:17:19,390 batch 104/1044 - loss 4.10156497 - lr 0.0010 - time 51.81s
|
110 |
+
2024-07-29 11:18:08,736 batch 208/1044 - loss 4.09617276 - lr 0.0010 - time 101.15s
|
111 |
+
2024-07-29 11:18:57,718 batch 312/1044 - loss 4.10813874 - lr 0.0010 - time 150.13s
|
112 |
+
2024-07-29 11:19:53,482 batch 416/1044 - loss 4.11702962 - lr 0.0010 - time 205.90s
|
113 |
+
2024-07-29 11:20:43,773 batch 520/1044 - loss 4.11525546 - lr 0.0010 - time 256.19s
|
114 |
+
2024-07-29 11:21:34,723 batch 624/1044 - loss 4.11790551 - lr 0.0010 - time 307.14s
|
115 |
+
2024-07-29 11:22:23,462 batch 728/1044 - loss 4.12154044 - lr 0.0010 - time 355.88s
|
116 |
+
2024-07-29 11:23:11,115 batch 832/1044 - loss 4.12138260 - lr 0.0010 - time 403.53s
|
117 |
+
2024-07-29 11:24:00,337 batch 936/1044 - loss 4.12506736 - lr 0.0010 - time 452.75s
|
118 |
+
2024-07-29 11:24:50,964 batch 1040/1044 - loss 4.12429898 - lr 0.0010 - time 503.38s
|
119 |
+
2024-07-29 11:24:52,983 ----------------------------------------------------------------------------------------------------
|
120 |
+
2024-07-29 11:24:52,984 EPOCH 5 DONE
|
121 |
+
2024-07-29 11:25:05,723 TRAIN Loss: 4.1247
|
122 |
+
2024-07-29 11:25:05,723 DEV Loss: 5.4289
|
123 |
+
2024-07-29 11:25:05,723 DEV Perplexity: 227.8912
|
124 |
+
2024-07-29 11:25:05,723 New best score!
|
125 |
+
2024-07-29 11:25:05,724 ----------------------------------------------------------------------------------------------------
|
126 |
+
2024-07-29 11:25:05,724 EPOCH 6
|
127 |
+
2024-07-29 11:25:59,338 batch 104/1044 - loss 3.89071036 - lr 0.0010 - time 53.61s
|
128 |
+
2024-07-29 11:26:50,131 batch 208/1044 - loss 3.91066583 - lr 0.0010 - time 104.41s
|
129 |
+
2024-07-29 11:27:39,605 batch 312/1044 - loss 3.92001536 - lr 0.0010 - time 153.88s
|
130 |
+
2024-07-29 11:28:26,705 batch 416/1044 - loss 3.91852045 - lr 0.0010 - time 200.98s
|
131 |
+
2024-07-29 11:29:21,163 batch 520/1044 - loss 3.92671625 - lr 0.0010 - time 255.44s
|
132 |
+
2024-07-29 11:30:09,942 batch 624/1044 - loss 3.93454336 - lr 0.0010 - time 304.22s
|
133 |
+
2024-07-29 11:31:02,918 batch 728/1044 - loss 3.94077764 - lr 0.0010 - time 357.19s
|
134 |
+
2024-07-29 11:31:53,528 batch 832/1044 - loss 3.94676249 - lr 0.0010 - time 407.80s
|
135 |
+
2024-07-29 11:32:41,961 batch 936/1044 - loss 3.95203299 - lr 0.0010 - time 456.24s
|
136 |
+
2024-07-29 11:33:31,394 batch 1040/1044 - loss 3.95468071 - lr 0.0010 - time 505.67s
|
137 |
+
2024-07-29 11:33:33,260 ----------------------------------------------------------------------------------------------------
|
138 |
+
2024-07-29 11:33:33,261 EPOCH 6 DONE
|
139 |
+
2024-07-29 11:33:46,131 TRAIN Loss: 3.9546
|
140 |
+
2024-07-29 11:33:46,131 DEV Loss: 5.4532
|
141 |
+
2024-07-29 11:33:46,131 DEV Perplexity: 233.4940
|
142 |
+
2024-07-29 11:33:46,131 No improvement for 1 epoch(s)
|
143 |
+
2024-07-29 11:33:46,131 ----------------------------------------------------------------------------------------------------
|
144 |
+
2024-07-29 11:33:46,131 EPOCH 7
|
145 |
+
2024-07-29 11:34:36,037 batch 104/1044 - loss 3.75202120 - lr 0.0010 - time 49.91s
|
146 |
+
2024-07-29 11:35:23,211 batch 208/1044 - loss 3.76428310 - lr 0.0010 - time 97.08s
|
147 |
+
2024-07-29 11:36:15,737 batch 312/1044 - loss 3.76220069 - lr 0.0010 - time 149.61s
|
148 |
+
2024-07-29 11:37:06,599 batch 416/1044 - loss 3.76866076 - lr 0.0010 - time 200.47s
|
149 |
+
2024-07-29 11:37:57,102 batch 520/1044 - loss 3.78008501 - lr 0.0010 - time 250.97s
|
150 |
+
2024-07-29 11:38:48,470 batch 624/1044 - loss 3.78899940 - lr 0.0010 - time 302.34s
|
151 |
+
2024-07-29 11:39:37,561 batch 728/1044 - loss 3.79675758 - lr 0.0010 - time 351.43s
|
152 |
+
2024-07-29 11:40:26,884 batch 832/1044 - loss 3.80079628 - lr 0.0010 - time 400.75s
|
153 |
+
2024-07-29 11:41:15,559 batch 936/1044 - loss 3.80748021 - lr 0.0010 - time 449.43s
|
154 |
+
2024-07-29 11:42:04,567 batch 1040/1044 - loss 3.81313193 - lr 0.0010 - time 498.44s
|
155 |
+
2024-07-29 11:42:06,383 ----------------------------------------------------------------------------------------------------
|
156 |
+
2024-07-29 11:42:06,384 EPOCH 7 DONE
|
157 |
+
2024-07-29 11:42:19,326 TRAIN Loss: 3.8132
|
158 |
+
2024-07-29 11:42:19,326 DEV Loss: 5.4459
|
159 |
+
2024-07-29 11:42:19,326 DEV Perplexity: 231.8110
|
160 |
+
2024-07-29 11:42:19,326 No improvement for 2 epoch(s)
|
161 |
+
2024-07-29 11:42:19,326 ----------------------------------------------------------------------------------------------------
|
162 |
+
2024-07-29 11:42:19,326 EPOCH 8
|
163 |
+
2024-07-29 11:43:11,878 batch 104/1044 - loss 3.64161499 - lr 0.0010 - time 52.55s
|
164 |
+
2024-07-29 11:44:02,812 batch 208/1044 - loss 3.66078973 - lr 0.0010 - time 103.49s
|
165 |
+
2024-07-29 11:44:54,581 batch 312/1044 - loss 3.66940373 - lr 0.0010 - time 155.26s
|
166 |
+
2024-07-29 11:45:42,502 batch 416/1044 - loss 3.67283917 - lr 0.0010 - time 203.18s
|
167 |
+
2024-07-29 11:46:32,748 batch 520/1044 - loss 3.67896443 - lr 0.0010 - time 253.42s
|
168 |
+
2024-07-29 11:47:19,611 batch 624/1044 - loss 3.68378819 - lr 0.0010 - time 300.28s
|
169 |
+
2024-07-29 11:48:12,844 batch 728/1044 - loss 3.68957532 - lr 0.0010 - time 353.52s
|
170 |
+
2024-07-29 11:49:01,503 batch 832/1044 - loss 3.69448218 - lr 0.0010 - time 402.18s
|
171 |
+
2024-07-29 11:49:51,030 batch 936/1044 - loss 3.70412089 - lr 0.0010 - time 451.70s
|
172 |
+
2024-07-29 11:50:42,780 batch 1040/1044 - loss 3.70785985 - lr 0.0010 - time 503.45s
|
173 |
+
2024-07-29 11:50:44,516 ----------------------------------------------------------------------------------------------------
|
174 |
+
2024-07-29 11:50:44,517 EPOCH 8 DONE
|
175 |
+
2024-07-29 11:50:57,332 TRAIN Loss: 3.7082
|
176 |
+
2024-07-29 11:50:57,332 DEV Loss: 5.4909
|
177 |
+
2024-07-29 11:50:57,332 DEV Perplexity: 242.4722
|
178 |
+
2024-07-29 11:50:57,332 No improvement for 3 epoch(s)
|
179 |
+
2024-07-29 11:50:57,332 ----------------------------------------------------------------------------------------------------
|
180 |
+
2024-07-29 11:50:57,332 EPOCH 9
|
181 |
+
2024-07-29 11:51:48,335 batch 104/1044 - loss 3.51649693 - lr 0.0010 - time 51.00s
|
182 |
+
2024-07-29 11:52:36,237 batch 208/1044 - loss 3.53223671 - lr 0.0010 - time 98.90s
|
183 |
+
2024-07-29 11:53:24,958 batch 312/1044 - loss 3.54294675 - lr 0.0010 - time 147.63s
|
184 |
+
2024-07-29 11:54:15,450 batch 416/1044 - loss 3.55195141 - lr 0.0010 - time 198.12s
|
185 |
+
2024-07-29 11:55:05,536 batch 520/1044 - loss 3.55877144 - lr 0.0010 - time 248.20s
|
186 |
+
2024-07-29 11:56:00,052 batch 624/1044 - loss 3.56457711 - lr 0.0010 - time 302.72s
|
187 |
+
2024-07-29 11:56:51,205 batch 728/1044 - loss 3.57586517 - lr 0.0010 - time 353.87s
|
188 |
+
2024-07-29 11:57:39,310 batch 832/1044 - loss 3.58044331 - lr 0.0010 - time 401.98s
|
189 |
+
2024-07-29 11:58:31,929 batch 936/1044 - loss 3.58557119 - lr 0.0010 - time 454.60s
|
190 |
+
2024-07-29 11:59:20,796 batch 1040/1044 - loss 3.59259140 - lr 0.0010 - time 503.46s
|
191 |
+
2024-07-29 11:59:22,537 ----------------------------------------------------------------------------------------------------
|
192 |
+
2024-07-29 11:59:22,537 EPOCH 9 DONE
|
193 |
+
2024-07-29 11:59:35,430 TRAIN Loss: 3.5929
|
194 |
+
2024-07-29 11:59:35,430 DEV Loss: 5.5051
|
195 |
+
2024-07-29 11:59:35,430 DEV Perplexity: 245.9499
|
196 |
+
2024-07-29 11:59:35,430 No improvement for 4 epoch(s)
|
197 |
+
2024-07-29 11:59:35,430 ----------------------------------------------------------------------------------------------------
|
198 |
+
2024-07-29 11:59:35,430 EPOCH 10
|
199 |
+
2024-07-29 12:00:26,025 batch 104/1044 - loss 3.40724007 - lr 0.0001 - time 50.59s
|
200 |
+
2024-07-29 12:01:13,195 batch 208/1044 - loss 3.39549031 - lr 0.0001 - time 97.76s
|
201 |
+
2024-07-29 12:02:02,790 batch 312/1044 - loss 3.38177447 - lr 0.0001 - time 147.36s
|
202 |
+
2024-07-29 12:02:54,277 batch 416/1044 - loss 3.37592916 - lr 0.0001 - time 198.85s
|
203 |
+
2024-07-29 12:03:45,463 batch 520/1044 - loss 3.37005038 - lr 0.0001 - time 250.03s
|
204 |
+
2024-07-29 12:04:33,863 batch 624/1044 - loss 3.37062476 - lr 0.0001 - time 298.43s
|
205 |
+
2024-07-29 12:05:26,246 batch 728/1044 - loss 3.37177335 - lr 0.0001 - time 350.82s
|
206 |
+
2024-07-29 12:06:15,720 batch 832/1044 - loss 3.37051947 - lr 0.0001 - time 400.29s
|
207 |
+
2024-07-29 12:07:04,090 batch 936/1044 - loss 3.36859261 - lr 0.0001 - time 448.66s
|
208 |
+
2024-07-29 12:07:56,434 batch 1040/1044 - loss 3.36913985 - lr 0.0001 - time 501.00s
|
209 |
+
2024-07-29 12:07:58,775 ----------------------------------------------------------------------------------------------------
|
210 |
+
2024-07-29 12:07:58,776 EPOCH 10 DONE
|
211 |
+
2024-07-29 12:08:11,643 TRAIN Loss: 3.3688
|
212 |
+
2024-07-29 12:08:11,644 DEV Loss: 5.5078
|
213 |
+
2024-07-29 12:08:11,644 DEV Perplexity: 246.6117
|
214 |
+
2024-07-29 12:08:11,644 No improvement for 5 epoch(s)
|
215 |
+
2024-07-29 12:08:11,644 Patience reached: Terminating model training due to early stopping
|
216 |
+
2024-07-29 12:08:11,644 ----------------------------------------------------------------------------------------------------
|
217 |
+
2024-07-29 12:08:11,644 Finished Training
|
218 |
+
2024-07-29 12:08:36,837 TEST Perplexity: 227.3162
|
219 |
+
2024-07-29 12:11:57,875 TEST BLEU = 12.94 77.1/52.4/11.1/0.6 (BP = 1.000 ratio = 1.000 hyp_len = 83 ref_len = 83)
|
models/en2de/word_end2end_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a28fa40bacd635da90d0587d54fc761ad127ab8edbfb3264e019008fce65d99
|
3 |
+
size 174378612
|
models/en2de/word_word2vec_embeddings_with_attention/log.txt
ADDED
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-07-29 12:12:08,122 ----------------------------------------------------------------------------------------------------
|
2 |
+
2024-07-29 12:12:08,122 Training Model
|
3 |
+
2024-07-29 12:12:08,122 ----------------------------------------------------------------------------------------------------
|
4 |
+
2024-07-29 12:12:08,122 Translator(
|
5 |
+
(encoder): EncoderLSTM(
|
6 |
+
(embedding): Embedding(14303, 300, padding_idx=14298)
|
7 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
8 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
9 |
+
)
|
10 |
+
(decoder): DecoderLSTM(
|
11 |
+
(embedding): Embedding(22834, 300, padding_idx=22829)
|
12 |
+
(dropout): Dropout(p=0.1, inplace=False)
|
13 |
+
(lstm): LSTM(300, 512, batch_first=True)
|
14 |
+
(attention): DotProductAttention(
|
15 |
+
(softmax): Softmax(dim=-1)
|
16 |
+
(combined2hidden): Sequential(
|
17 |
+
(0): Linear(in_features=1024, out_features=512, bias=True)
|
18 |
+
(1): ReLU()
|
19 |
+
)
|
20 |
+
)
|
21 |
+
(hidden2vocab): Linear(in_features=512, out_features=22834, bias=True)
|
22 |
+
(log_softmax): LogSoftmax(dim=-1)
|
23 |
+
)
|
24 |
+
)
|
25 |
+
2024-07-29 12:12:08,122 ----------------------------------------------------------------------------------------------------
|
26 |
+
2024-07-29 12:12:08,122 Training Hyperparameters:
|
27 |
+
2024-07-29 12:12:08,122 - max_epochs: 10
|
28 |
+
2024-07-29 12:12:08,122 - learning_rate: 0.001
|
29 |
+
2024-07-29 12:12:08,122 - batch_size: 128
|
30 |
+
2024-07-29 12:12:08,122 - patience: 5
|
31 |
+
2024-07-29 12:12:08,122 - scheduler_patience: 3
|
32 |
+
2024-07-29 12:12:08,122 - teacher_forcing_ratio: 0.5
|
33 |
+
2024-07-29 12:12:08,122 ----------------------------------------------------------------------------------------------------
|
34 |
+
2024-07-29 12:12:08,122 Computational Parameters:
|
35 |
+
2024-07-29 12:12:08,122 - num_workers: 4
|
36 |
+
2024-07-29 12:12:08,122 - device: device(type='cuda', index=0)
|
37 |
+
2024-07-29 12:12:08,122 ----------------------------------------------------------------------------------------------------
|
38 |
+
2024-07-29 12:12:08,122 Dataset Splits:
|
39 |
+
2024-07-29 12:12:08,122 - train: 133623 data points
|
40 |
+
2024-07-29 12:12:08,122 - dev: 19090 data points
|
41 |
+
2024-07-29 12:12:08,122 - test: 38179 data points
|
42 |
+
2024-07-29 12:12:08,122 ----------------------------------------------------------------------------------------------------
|
43 |
+
2024-07-29 12:12:08,122 EPOCH 1
|
44 |
+
2024-07-29 12:12:52,504 batch 104/1044 - loss 6.55590570 - lr 0.0010 - time 44.38s
|
45 |
+
2024-07-29 12:13:37,946 batch 208/1044 - loss 6.22632505 - lr 0.0010 - time 89.82s
|
46 |
+
2024-07-29 12:14:22,173 batch 312/1044 - loss 6.00409512 - lr 0.0010 - time 134.05s
|
47 |
+
2024-07-29 12:15:05,473 batch 416/1044 - loss 5.82860130 - lr 0.0010 - time 177.35s
|
48 |
+
2024-07-29 12:15:53,882 batch 520/1044 - loss 5.67626224 - lr 0.0010 - time 225.76s
|
49 |
+
2024-07-29 12:16:38,396 batch 624/1044 - loss 5.55041786 - lr 0.0010 - time 270.27s
|
50 |
+
2024-07-29 12:17:21,976 batch 728/1044 - loss 5.43503429 - lr 0.0010 - time 313.85s
|
51 |
+
2024-07-29 12:18:08,257 batch 832/1044 - loss 5.33554016 - lr 0.0010 - time 360.13s
|
52 |
+
2024-07-29 12:18:53,099 batch 936/1044 - loss 5.24639468 - lr 0.0010 - time 404.98s
|
53 |
+
2024-07-29 12:19:38,001 batch 1040/1044 - loss 5.16604180 - lr 0.0010 - time 449.88s
|
54 |
+
2024-07-29 12:19:40,054 ----------------------------------------------------------------------------------------------------
|
55 |
+
2024-07-29 12:19:40,055 EPOCH 1 DONE
|
56 |
+
2024-07-29 12:19:49,946 TRAIN Loss: 5.1631
|
57 |
+
2024-07-29 12:19:49,946 DEV Loss: 5.6784
|
58 |
+
2024-07-29 12:19:49,946 DEV Perplexity: 292.4926
|
59 |
+
2024-07-29 12:19:49,946 New best score!
|
60 |
+
2024-07-29 12:19:49,947 ----------------------------------------------------------------------------------------------------
|
61 |
+
2024-07-29 12:19:49,947 EPOCH 2
|
62 |
+
2024-07-29 12:20:37,392 batch 104/1044 - loss 4.28788665 - lr 0.0010 - time 47.44s
|
63 |
+
2024-07-29 12:21:23,048 batch 208/1044 - loss 4.26111834 - lr 0.0010 - time 93.10s
|
64 |
+
2024-07-29 12:22:03,080 batch 312/1044 - loss 4.23521801 - lr 0.0010 - time 133.13s
|
65 |
+
2024-07-29 12:22:48,437 batch 416/1044 - loss 4.22231107 - lr 0.0010 - time 178.49s
|
66 |
+
2024-07-29 12:23:33,903 batch 520/1044 - loss 4.19854044 - lr 0.0010 - time 223.96s
|
67 |
+
2024-07-29 12:24:19,398 batch 624/1044 - loss 4.18936884 - lr 0.0010 - time 269.45s
|
68 |
+
2024-07-29 12:25:02,624 batch 728/1044 - loss 4.17078320 - lr 0.0010 - time 312.68s
|
69 |
+
2024-07-29 12:25:47,352 batch 832/1044 - loss 4.15500766 - lr 0.0010 - time 357.41s
|
70 |
+
2024-07-29 12:26:33,400 batch 936/1044 - loss 4.13493889 - lr 0.0010 - time 403.45s
|
71 |
+
2024-07-29 12:27:16,895 batch 1040/1044 - loss 4.12070917 - lr 0.0010 - time 446.95s
|
72 |
+
2024-07-29 12:27:18,218 ----------------------------------------------------------------------------------------------------
|
73 |
+
2024-07-29 12:27:18,219 EPOCH 2 DONE
|
74 |
+
2024-07-29 12:27:28,202 TRAIN Loss: 4.1205
|
75 |
+
2024-07-29 12:27:28,202 DEV Loss: 5.4640
|
76 |
+
2024-07-29 12:27:28,202 DEV Perplexity: 236.0356
|
77 |
+
2024-07-29 12:27:28,202 New best score!
|
78 |
+
2024-07-29 12:27:28,204 ----------------------------------------------------------------------------------------------------
|
79 |
+
2024-07-29 12:27:28,204 EPOCH 3
|
80 |
+
2024-07-29 12:28:15,493 batch 104/1044 - loss 3.82035114 - lr 0.0010 - time 47.29s
|
81 |
+
2024-07-29 12:28:59,217 batch 208/1044 - loss 3.83381896 - lr 0.0010 - time 91.01s
|
82 |
+
2024-07-29 12:29:43,775 batch 312/1044 - loss 3.82385287 - lr 0.0010 - time 135.57s
|
83 |
+
2024-07-29 12:30:28,818 batch 416/1044 - loss 3.81582692 - lr 0.0010 - time 180.61s
|
84 |
+
2024-07-29 12:31:14,705 batch 520/1044 - loss 3.80935454 - lr 0.0010 - time 226.50s
|
85 |
+
2024-07-29 12:31:58,853 batch 624/1044 - loss 3.80629152 - lr 0.0010 - time 270.65s
|
86 |
+
2024-07-29 12:32:44,045 batch 728/1044 - loss 3.79856095 - lr 0.0010 - time 315.84s
|
87 |
+
2024-07-29 12:33:27,294 batch 832/1044 - loss 3.79240243 - lr 0.0010 - time 359.09s
|
88 |
+
2024-07-29 12:34:13,749 batch 936/1044 - loss 3.78315422 - lr 0.0010 - time 405.55s
|
89 |
+
2024-07-29 12:34:57,878 batch 1040/1044 - loss 3.77489387 - lr 0.0010 - time 449.67s
|
90 |
+
2024-07-29 12:34:59,772 ----------------------------------------------------------------------------------------------------
|
91 |
+
2024-07-29 12:34:59,774 EPOCH 3 DONE
|
92 |
+
2024-07-29 12:35:09,586 TRAIN Loss: 3.7744
|
93 |
+
2024-07-29 12:35:09,586 DEV Loss: 5.5166
|
94 |
+
2024-07-29 12:35:09,586 DEV Perplexity: 248.7894
|
95 |
+
2024-07-29 12:35:09,586 No improvement for 1 epoch(s)
|
96 |
+
2024-07-29 12:35:09,586 ----------------------------------------------------------------------------------------------------
|
97 |
+
2024-07-29 12:35:09,586 EPOCH 4
|
98 |
+
2024-07-29 12:35:55,319 batch 104/1044 - loss 3.57704538 - lr 0.0010 - time 45.73s
|
99 |
+
2024-07-29 12:36:38,647 batch 208/1044 - loss 3.58741621 - lr 0.0010 - time 89.06s
|
100 |
+
2024-07-29 12:37:25,244 batch 312/1044 - loss 3.59462038 - lr 0.0010 - time 135.66s
|
101 |
+
2024-07-29 12:38:07,593 batch 416/1044 - loss 3.59290513 - lr 0.0010 - time 178.01s
|
102 |
+
2024-07-29 12:38:53,489 batch 520/1044 - loss 3.59325143 - lr 0.0010 - time 223.90s
|
103 |
+
2024-07-29 12:39:38,227 batch 624/1044 - loss 3.59581574 - lr 0.0010 - time 268.64s
|
104 |
+
2024-07-29 12:40:21,395 batch 728/1044 - loss 3.59330660 - lr 0.0010 - time 311.81s
|
105 |
+
2024-07-29 12:41:06,582 batch 832/1044 - loss 3.59253718 - lr 0.0010 - time 357.00s
|
106 |
+
2024-07-29 12:41:54,507 batch 936/1044 - loss 3.59168349 - lr 0.0010 - time 404.92s
|
107 |
+
2024-07-29 12:42:37,705 batch 1040/1044 - loss 3.59140670 - lr 0.0010 - time 448.12s
|
108 |
+
2024-07-29 12:42:39,176 ----------------------------------------------------------------------------------------------------
|
109 |
+
2024-07-29 12:42:39,177 EPOCH 4 DONE
|
110 |
+
2024-07-29 12:42:48,957 TRAIN Loss: 3.5911
|
111 |
+
2024-07-29 12:42:48,958 DEV Loss: 5.5211
|
112 |
+
2024-07-29 12:42:48,958 DEV Perplexity: 249.9211
|
113 |
+
2024-07-29 12:42:48,958 No improvement for 2 epoch(s)
|
114 |
+
2024-07-29 12:42:48,958 ----------------------------------------------------------------------------------------------------
|
115 |
+
2024-07-29 12:42:48,958 EPOCH 5
|
116 |
+
2024-07-29 12:43:31,233 batch 104/1044 - loss 3.43450736 - lr 0.0010 - time 42.27s
|
117 |
+
2024-07-29 12:44:18,916 batch 208/1044 - loss 3.44922357 - lr 0.0010 - time 89.96s
|
118 |
+
2024-07-29 12:45:02,751 batch 312/1044 - loss 3.45619388 - lr 0.0010 - time 133.79s
|
119 |
+
2024-07-29 12:45:45,808 batch 416/1044 - loss 3.46356326 - lr 0.0010 - time 176.85s
|
120 |
+
2024-07-29 12:46:30,911 batch 520/1044 - loss 3.47369989 - lr 0.0010 - time 221.95s
|
121 |
+
2024-07-29 12:47:13,690 batch 624/1044 - loss 3.47065963 - lr 0.0010 - time 264.73s
|
122 |
+
2024-07-29 12:48:00,201 batch 728/1044 - loss 3.47413705 - lr 0.0010 - time 311.24s
|
123 |
+
2024-07-29 12:48:48,151 batch 832/1044 - loss 3.46989174 - lr 0.0010 - time 359.19s
|
124 |
+
2024-07-29 12:49:31,752 batch 936/1044 - loss 3.46717992 - lr 0.0010 - time 402.79s
|
125 |
+
2024-07-29 12:50:15,426 batch 1040/1044 - loss 3.47124956 - lr 0.0010 - time 446.47s
|
126 |
+
2024-07-29 12:50:17,412 ----------------------------------------------------------------------------------------------------
|
127 |
+
2024-07-29 12:50:17,414 EPOCH 5 DONE
|
128 |
+
2024-07-29 12:50:27,169 TRAIN Loss: 3.4710
|
129 |
+
2024-07-29 12:50:27,170 DEV Loss: 5.4003
|
130 |
+
2024-07-29 12:50:27,170 DEV Perplexity: 221.4734
|
131 |
+
2024-07-29 12:50:27,170 New best score!
|
132 |
+
2024-07-29 12:50:27,171 ----------------------------------------------------------------------------------------------------
|
133 |
+
2024-07-29 12:50:27,171 EPOCH 6
|
134 |
+
2024-07-29 12:51:09,349 batch 104/1044 - loss 3.33496981 - lr 0.0010 - time 42.18s
|
135 |
+
2024-07-29 12:51:54,647 batch 208/1044 - loss 3.32858963 - lr 0.0010 - time 87.48s
|
136 |
+
2024-07-29 12:52:37,276 batch 312/1044 - loss 3.33771896 - lr 0.0010 - time 130.10s
|
137 |
+
2024-07-29 12:53:21,503 batch 416/1044 - loss 3.35001282 - lr 0.0010 - time 174.33s
|
138 |
+
2024-07-29 12:54:09,458 batch 520/1044 - loss 3.34545176 - lr 0.0010 - time 222.29s
|
139 |
+
2024-07-29 12:54:56,424 batch 624/1044 - loss 3.35168742 - lr 0.0010 - time 269.25s
|
140 |
+
2024-07-29 12:55:39,905 batch 728/1044 - loss 3.35706329 - lr 0.0010 - time 312.73s
|
141 |
+
2024-07-29 12:56:25,200 batch 832/1044 - loss 3.36277576 - lr 0.0010 - time 358.03s
|
142 |
+
2024-07-29 12:57:10,413 batch 936/1044 - loss 3.36653246 - lr 0.0010 - time 403.24s
|
143 |
+
2024-07-29 12:57:55,633 batch 1040/1044 - loss 3.36651458 - lr 0.0010 - time 448.46s
|
144 |
+
2024-07-29 12:57:57,482 ----------------------------------------------------------------------------------------------------
|
145 |
+
2024-07-29 12:57:57,483 EPOCH 6 DONE
|
146 |
+
2024-07-29 12:58:07,292 TRAIN Loss: 3.3667
|
147 |
+
2024-07-29 12:58:07,292 DEV Loss: 5.4617
|
148 |
+
2024-07-29 12:58:07,292 DEV Perplexity: 235.4981
|
149 |
+
2024-07-29 12:58:07,292 No improvement for 1 epoch(s)
|
150 |
+
2024-07-29 12:58:07,292 ----------------------------------------------------------------------------------------------------
|
151 |
+
2024-07-29 12:58:07,292 EPOCH 7
|
152 |
+
2024-07-29 12:58:50,690 batch 104/1044 - loss 3.23634231 - lr 0.0010 - time 43.40s
|
153 |
+
2024-07-29 12:59:36,713 batch 208/1044 - loss 3.23920326 - lr 0.0010 - time 89.42s
|
154 |
+
2024-07-29 13:00:22,148 batch 312/1044 - loss 3.23943090 - lr 0.0010 - time 134.86s
|
155 |
+
2024-07-29 13:01:05,039 batch 416/1044 - loss 3.24726985 - lr 0.0010 - time 177.75s
|
156 |
+
2024-07-29 13:01:49,884 batch 520/1044 - loss 3.25340349 - lr 0.0010 - time 222.59s
|
157 |
+
2024-07-29 13:02:34,159 batch 624/1044 - loss 3.25818986 - lr 0.0010 - time 266.87s
|
158 |
+
2024-07-29 13:03:20,346 batch 728/1044 - loss 3.26475725 - lr 0.0010 - time 313.05s
|
159 |
+
2024-07-29 13:04:04,767 batch 832/1044 - loss 3.26531474 - lr 0.0010 - time 357.47s
|
160 |
+
2024-07-29 13:04:49,324 batch 936/1044 - loss 3.27283444 - lr 0.0010 - time 402.03s
|
161 |
+
2024-07-29 13:05:34,031 batch 1040/1044 - loss 3.27731857 - lr 0.0010 - time 446.74s
|
162 |
+
2024-07-29 13:05:35,672 ----------------------------------------------------------------------------------------------------
|
163 |
+
2024-07-29 13:05:35,674 EPOCH 7 DONE
|
164 |
+
2024-07-29 13:05:45,570 TRAIN Loss: 3.2776
|
165 |
+
2024-07-29 13:05:45,570 DEV Loss: 5.4233
|
166 |
+
2024-07-29 13:05:45,570 DEV Perplexity: 226.6176
|
167 |
+
2024-07-29 13:05:45,570 No improvement for 2 epoch(s)
|
168 |
+
2024-07-29 13:05:45,570 ----------------------------------------------------------------------------------------------------
|
169 |
+
2024-07-29 13:05:45,570 EPOCH 8
|
170 |
+
2024-07-29 13:06:30,788 batch 104/1044 - loss 3.14689155 - lr 0.0010 - time 45.22s
|
171 |
+
2024-07-29 13:07:16,698 batch 208/1044 - loss 3.16702358 - lr 0.0010 - time 91.13s
|
172 |
+
2024-07-29 13:08:02,627 batch 312/1044 - loss 3.17593768 - lr 0.0010 - time 137.06s
|
173 |
+
2024-07-29 13:08:46,341 batch 416/1044 - loss 3.18920171 - lr 0.0010 - time 180.77s
|
174 |
+
2024-07-29 13:09:31,427 batch 520/1044 - loss 3.19404678 - lr 0.0010 - time 225.86s
|
175 |
+
2024-07-29 13:10:17,560 batch 624/1044 - loss 3.19597690 - lr 0.0010 - time 271.99s
|
176 |
+
2024-07-29 13:11:02,396 batch 728/1044 - loss 3.20055166 - lr 0.0010 - time 316.83s
|
177 |
+
2024-07-29 13:11:45,174 batch 832/1044 - loss 3.20173663 - lr 0.0010 - time 359.60s
|
178 |
+
2024-07-29 13:12:28,915 batch 936/1044 - loss 3.20539983 - lr 0.0010 - time 403.35s
|
179 |
+
2024-07-29 13:13:13,435 batch 1040/1044 - loss 3.20908286 - lr 0.0010 - time 447.86s
|
180 |
+
2024-07-29 13:13:15,049 ----------------------------------------------------------------------------------------------------
|
181 |
+
2024-07-29 13:13:15,050 EPOCH 8 DONE
|
182 |
+
2024-07-29 13:13:24,914 TRAIN Loss: 3.2084
|
183 |
+
2024-07-29 13:13:24,915 DEV Loss: 5.5271
|
184 |
+
2024-07-29 13:13:24,915 DEV Perplexity: 251.4024
|
185 |
+
2024-07-29 13:13:24,915 No improvement for 3 epoch(s)
|
186 |
+
2024-07-29 13:13:24,915 ----------------------------------------------------------------------------------------------------
|
187 |
+
2024-07-29 13:13:24,915 EPOCH 9
|
188 |
+
2024-07-29 13:14:09,534 batch 104/1044 - loss 3.07291587 - lr 0.0010 - time 44.62s
|
189 |
+
2024-07-29 13:14:56,545 batch 208/1044 - loss 3.08707912 - lr 0.0010 - time 91.63s
|
190 |
+
2024-07-29 13:15:40,957 batch 312/1044 - loss 3.11552258 - lr 0.0010 - time 136.04s
|
191 |
+
2024-07-29 13:16:23,759 batch 416/1044 - loss 3.12117406 - lr 0.0010 - time 178.84s
|
192 |
+
2024-07-29 13:17:09,590 batch 520/1044 - loss 3.12517969 - lr 0.0010 - time 224.68s
|
193 |
+
2024-07-29 13:17:55,193 batch 624/1044 - loss 3.13141782 - lr 0.0010 - time 270.28s
|
194 |
+
2024-07-29 13:18:39,558 batch 728/1044 - loss 3.14053547 - lr 0.0010 - time 314.64s
|
195 |
+
2024-07-29 13:19:24,852 batch 832/1044 - loss 3.14332676 - lr 0.0010 - time 359.94s
|
196 |
+
2024-07-29 13:20:08,385 batch 936/1044 - loss 3.14388524 - lr 0.0010 - time 403.47s
|
197 |
+
2024-07-29 13:20:52,830 batch 1040/1044 - loss 3.14750358 - lr 0.0010 - time 447.91s
|
198 |
+
2024-07-29 13:20:54,425 ----------------------------------------------------------------------------------------------------
|
199 |
+
2024-07-29 13:20:54,427 EPOCH 9 DONE
|
200 |
+
2024-07-29 13:21:04,051 TRAIN Loss: 3.1471
|
201 |
+
2024-07-29 13:21:04,051 DEV Loss: 5.6439
|
202 |
+
2024-07-29 13:21:04,051 DEV Perplexity: 282.5750
|
203 |
+
2024-07-29 13:21:04,051 No improvement for 4 epoch(s)
|
204 |
+
2024-07-29 13:21:04,051 ----------------------------------------------------------------------------------------------------
|
205 |
+
2024-07-29 13:21:04,051 EPOCH 10
|
206 |
+
2024-07-29 13:21:47,142 batch 104/1044 - loss 3.01199039 - lr 0.0001 - time 43.09s
|
207 |
+
2024-07-29 13:22:33,458 batch 208/1044 - loss 3.00634472 - lr 0.0001 - time 89.41s
|
208 |
+
2024-07-29 13:23:16,873 batch 312/1044 - loss 2.99168187 - lr 0.0001 - time 132.82s
|
209 |
+
2024-07-29 13:24:03,708 batch 416/1044 - loss 2.98407727 - lr 0.0001 - time 179.66s
|
210 |
+
2024-07-29 13:24:52,162 batch 520/1044 - loss 2.97643183 - lr 0.0001 - time 228.11s
|
211 |
+
2024-07-29 13:25:34,213 batch 624/1044 - loss 2.96662284 - lr 0.0001 - time 270.16s
|
212 |
+
2024-07-29 13:26:21,081 batch 728/1044 - loss 2.96243565 - lr 0.0001 - time 317.03s
|
213 |
+
2024-07-29 13:27:05,039 batch 832/1044 - loss 2.95927654 - lr 0.0001 - time 360.99s
|
214 |
+
2024-07-29 13:27:51,029 batch 936/1044 - loss 2.95770299 - lr 0.0001 - time 406.98s
|
215 |
+
2024-07-29 13:28:35,096 batch 1040/1044 - loss 2.95871426 - lr 0.0001 - time 451.04s
|
216 |
+
2024-07-29 13:28:36,626 ----------------------------------------------------------------------------------------------------
|
217 |
+
2024-07-29 13:28:36,628 EPOCH 10 DONE
|
218 |
+
2024-07-29 13:28:46,529 TRAIN Loss: 2.9588
|
219 |
+
2024-07-29 13:28:46,530 DEV Loss: 5.5312
|
220 |
+
2024-07-29 13:28:46,530 DEV Perplexity: 252.4576
|
221 |
+
2024-07-29 13:28:46,530 No improvement for 5 epoch(s)
|
222 |
+
2024-07-29 13:28:46,530 Patience reached: Terminating model training due to early stopping
|
223 |
+
2024-07-29 13:28:46,530 ----------------------------------------------------------------------------------------------------
|
224 |
+
2024-07-29 13:28:46,530 Finished Training
|
225 |
+
2024-07-29 13:29:05,781 TEST Perplexity: 222.3681
|
226 |
+
2024-07-29 13:39:02,744 TEST BLEU = 21.98 82.0/54.5/14.9/3.5 (BP = 1.000 ratio = 1.000 hyp_len = 89 ref_len = 89)
|
models/en2de/word_word2vec_embeddings_with_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:194c263c7b1faeba7282be66b75ea1b383bc3a07a9a487fd2a5fb96bf1e20442
|
3 |
+
size 107988520
|
models/en2de/word_word2vec_embeddings_without_attention/log.txt
ADDED
File without changes
|
models/en2de/word_word2vec_embeddings_without_attention/model.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85e934fcb656d58ff61ced0984536560c49ce03a3749f234a561a9d829da0dd5
|
3 |
+
size 174378292
|