abhisheksan commited on
Commit
f4f946e
1 Parent(s): 2442c76

Add model configuration and improve model initialization in ModelManager

Browse files
Files changed (2) hide show
  1. logs/poetry_generation.log +170 -0
  2. main.py +20 -6
logs/poetry_generation.log CHANGED
@@ -105,3 +105,173 @@ OSError: Unable to load weights from pytorch checkpoint file for './models/pytor
105
  2024-11-16 23:35:18,815 - main - ERROR - Failed to initialize model manager
106
  2024-11-16 23:37:05,649 - main - INFO - Loading tokenizer...
107
  2024-11-16 23:37:06,372 - main - INFO - Loading model...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  2024-11-16 23:35:18,815 - main - ERROR - Failed to initialize model manager
106
  2024-11-16 23:37:05,649 - main - INFO - Loading tokenizer...
107
  2024-11-16 23:37:06,372 - main - INFO - Loading model...
108
+ 2024-11-16 23:40:15,280 - main - ERROR - Error initializing model: Error(s) in loading state_dict for GPT2LMHeadModel:
109
+ Missing key(s) in state_dict: "transformer.h.6.ln_1.weight", "transformer.h.6.ln_1.bias", "transformer.h.6.attn.c_attn.weight", "transformer.h.6.attn.c_attn.bias", "transformer.h.6.attn.c_proj.weight", "transformer.h.6.attn.c_proj.bias", "transformer.h.6.ln_2.weight", "transformer.h.6.ln_2.bias", "transformer.h.6.mlp.c_fc.weight", "transformer.h.6.mlp.c_fc.bias", "transformer.h.6.mlp.c_proj.weight", "transformer.h.6.mlp.c_proj.bias", "transformer.h.7.ln_1.weight", "transformer.h.7.ln_1.bias", "transformer.h.7.attn.c_attn.weight", "transformer.h.7.attn.c_attn.bias", "transformer.h.7.attn.c_proj.weight", "transformer.h.7.attn.c_proj.bias", "transformer.h.7.ln_2.weight", "transformer.h.7.ln_2.bias", "transformer.h.7.mlp.c_fc.weight", "transformer.h.7.mlp.c_fc.bias", "transformer.h.7.mlp.c_proj.weight", "transformer.h.7.mlp.c_proj.bias", "transformer.h.8.ln_1.weight", "transformer.h.8.ln_1.bias", "transformer.h.8.attn.c_attn.weight", "transformer.h.8.attn.c_attn.bias", "transformer.h.8.attn.c_proj.weight", "transformer.h.8.attn.c_proj.bias", "transformer.h.8.ln_2.weight", "transformer.h.8.ln_2.bias", "transformer.h.8.mlp.c_fc.weight", "transformer.h.8.mlp.c_fc.bias", "transformer.h.8.mlp.c_proj.weight", "transformer.h.8.mlp.c_proj.bias", "transformer.h.9.ln_1.weight", "transformer.h.9.ln_1.bias", "transformer.h.9.attn.c_attn.weight", "transformer.h.9.attn.c_attn.bias", "transformer.h.9.attn.c_proj.weight", "transformer.h.9.attn.c_proj.bias", "transformer.h.9.ln_2.weight", "transformer.h.9.ln_2.bias", "transformer.h.9.mlp.c_fc.weight", "transformer.h.9.mlp.c_fc.bias", "transformer.h.9.mlp.c_proj.weight", "transformer.h.9.mlp.c_proj.bias", "transformer.h.10.ln_1.weight", "transformer.h.10.ln_1.bias", "transformer.h.10.attn.c_attn.weight", "transformer.h.10.attn.c_attn.bias", "transformer.h.10.attn.c_proj.weight", "transformer.h.10.attn.c_proj.bias", "transformer.h.10.ln_2.weight", "transformer.h.10.ln_2.bias", "transformer.h.10.mlp.c_fc.weight", "transformer.h.10.mlp.c_fc.bias", "transformer.h.10.mlp.c_proj.weight", "transformer.h.10.mlp.c_proj.bias", "transformer.h.11.ln_1.weight", "transformer.h.11.ln_1.bias", "transformer.h.11.attn.c_attn.weight", "transformer.h.11.attn.c_attn.bias", "transformer.h.11.attn.c_proj.weight", "transformer.h.11.attn.c_proj.bias", "transformer.h.11.ln_2.weight", "transformer.h.11.ln_2.bias", "transformer.h.11.mlp.c_fc.weight", "transformer.h.11.mlp.c_fc.bias", "transformer.h.11.mlp.c_proj.weight", "transformer.h.11.mlp.c_proj.bias", "lm_head.weight".
110
+ Unexpected key(s) in state_dict: "lm_head.scale", "lm_head.zero_point", "lm_head._packed_params.dtype", "lm_head._packed_params._packed_params".
111
+ size mismatch for transformer.wte.weight: copying a param with shape torch.Size([50257, 384]) from checkpoint, the shape in current model is torch.Size([50257, 768]).
112
+ size mismatch for transformer.wpe.weight: copying a param with shape torch.Size([128, 384]) from checkpoint, the shape in current model is torch.Size([1024, 768]).
113
+ size mismatch for transformer.h.0.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
114
+ size mismatch for transformer.h.0.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
115
+ size mismatch for transformer.h.0.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
116
+ size mismatch for transformer.h.0.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
117
+ size mismatch for transformer.h.0.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
118
+ size mismatch for transformer.h.0.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
119
+ size mismatch for transformer.h.0.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
120
+ size mismatch for transformer.h.0.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
121
+ size mismatch for transformer.h.0.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
122
+ size mismatch for transformer.h.0.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
123
+ size mismatch for transformer.h.0.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
124
+ size mismatch for transformer.h.0.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
125
+ size mismatch for transformer.h.1.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
126
+ size mismatch for transformer.h.1.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
127
+ size mismatch for transformer.h.1.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
128
+ size mismatch for transformer.h.1.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
129
+ size mismatch for transformer.h.1.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
130
+ size mismatch for transformer.h.1.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
131
+ size mismatch for transformer.h.1.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
132
+ size mismatch for transformer.h.1.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
133
+ size mismatch for transformer.h.1.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
134
+ size mismatch for transformer.h.1.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
135
+ size mismatch for transformer.h.1.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
136
+ size mismatch for transformer.h.1.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
137
+ size mismatch for transformer.h.2.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
138
+ size mismatch for transformer.h.2.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
139
+ size mismatch for transformer.h.2.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
140
+ size mismatch for transformer.h.2.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
141
+ size mismatch for transformer.h.2.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
142
+ size mismatch for transformer.h.2.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
143
+ size mismatch for transformer.h.2.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
144
+ size mismatch for transformer.h.2.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
145
+ size mismatch for transformer.h.2.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
146
+ size mismatch for transformer.h.2.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
147
+ size mismatch for transformer.h.2.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
148
+ size mismatch for transformer.h.2.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
149
+ size mismatch for transformer.h.3.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
150
+ size mismatch for transformer.h.3.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
151
+ size mismatch for transformer.h.3.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
152
+ size mismatch for transformer.h.3.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
153
+ size mismatch for transformer.h.3.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
154
+ size mismatch for transformer.h.3.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
155
+ size mismatch for transformer.h.3.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
156
+ size mismatch for transformer.h.3.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
157
+ size mismatch for transformer.h.3.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
158
+ size mismatch for transformer.h.3.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
159
+ size mismatch for transformer.h.3.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
160
+ size mismatch for transformer.h.3.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
161
+ size mismatch for transformer.h.4.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
162
+ size mismatch for transformer.h.4.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
163
+ size mismatch for transformer.h.4.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
164
+ size mismatch for transformer.h.4.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
165
+ size mismatch for transformer.h.4.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
166
+ size mismatch for transformer.h.4.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
167
+ size mismatch for transformer.h.4.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
168
+ size mismatch for transformer.h.4.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
169
+ size mismatch for transformer.h.4.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
170
+ size mismatch for transformer.h.4.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
171
+ size mismatch for transformer.h.4.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
172
+ size mismatch for transformer.h.4.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
173
+ size mismatch for transformer.h.5.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
174
+ size mismatch for transformer.h.5.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
175
+ size mismatch for transformer.h.5.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
176
+ size mismatch for transformer.h.5.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
177
+ size mismatch for transformer.h.5.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
178
+ size mismatch for transformer.h.5.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
179
+ size mismatch for transformer.h.5.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
180
+ size mismatch for transformer.h.5.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
181
+ size mismatch for transformer.h.5.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
182
+ size mismatch for transformer.h.5.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
183
+ size mismatch for transformer.h.5.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
184
+ size mismatch for transformer.h.5.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
185
+ size mismatch for transformer.ln_f.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
186
+ size mismatch for transformer.ln_f.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
187
+ 2024-11-16 23:40:15,283 - main - ERROR - Detailed traceback:
188
+ Traceback (most recent call last):
189
+ File "E:\Self Work\My Projects\Poetica HuggingFace Server\poetica\main.py", line 74, in initialize
190
+ self.model.load_state_dict(state_dict)
191
+ File "e:\Self Work\My Projects\Poetica HuggingFace Server\.venv\Lib\site-packages\torch\nn\modules\module.py", line 2189, in load_state_dict
192
+ raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
193
+ RuntimeError: Error(s) in loading state_dict for GPT2LMHeadModel:
194
+ Missing key(s) in state_dict: "transformer.h.6.ln_1.weight", "transformer.h.6.ln_1.bias", "transformer.h.6.attn.c_attn.weight", "transformer.h.6.attn.c_attn.bias", "transformer.h.6.attn.c_proj.weight", "transformer.h.6.attn.c_proj.bias", "transformer.h.6.ln_2.weight", "transformer.h.6.ln_2.bias", "transformer.h.6.mlp.c_fc.weight", "transformer.h.6.mlp.c_fc.bias", "transformer.h.6.mlp.c_proj.weight", "transformer.h.6.mlp.c_proj.bias", "transformer.h.7.ln_1.weight", "transformer.h.7.ln_1.bias", "transformer.h.7.attn.c_attn.weight", "transformer.h.7.attn.c_attn.bias", "transformer.h.7.attn.c_proj.weight", "transformer.h.7.attn.c_proj.bias", "transformer.h.7.ln_2.weight", "transformer.h.7.ln_2.bias", "transformer.h.7.mlp.c_fc.weight", "transformer.h.7.mlp.c_fc.bias", "transformer.h.7.mlp.c_proj.weight", "transformer.h.7.mlp.c_proj.bias", "transformer.h.8.ln_1.weight", "transformer.h.8.ln_1.bias", "transformer.h.8.attn.c_attn.weight", "transformer.h.8.attn.c_attn.bias", "transformer.h.8.attn.c_proj.weight", "transformer.h.8.attn.c_proj.bias", "transformer.h.8.ln_2.weight", "transformer.h.8.ln_2.bias", "transformer.h.8.mlp.c_fc.weight", "transformer.h.8.mlp.c_fc.bias", "transformer.h.8.mlp.c_proj.weight", "transformer.h.8.mlp.c_proj.bias", "transformer.h.9.ln_1.weight", "transformer.h.9.ln_1.bias", "transformer.h.9.attn.c_attn.weight", "transformer.h.9.attn.c_attn.bias", "transformer.h.9.attn.c_proj.weight", "transformer.h.9.attn.c_proj.bias", "transformer.h.9.ln_2.weight", "transformer.h.9.ln_2.bias", "transformer.h.9.mlp.c_fc.weight", "transformer.h.9.mlp.c_fc.bias", "transformer.h.9.mlp.c_proj.weight", "transformer.h.9.mlp.c_proj.bias", "transformer.h.10.ln_1.weight", "transformer.h.10.ln_1.bias", "transformer.h.10.attn.c_attn.weight", "transformer.h.10.attn.c_attn.bias", "transformer.h.10.attn.c_proj.weight", "transformer.h.10.attn.c_proj.bias", "transformer.h.10.ln_2.weight", "transformer.h.10.ln_2.bias", "transformer.h.10.mlp.c_fc.weight", "transformer.h.10.mlp.c_fc.bias", "transformer.h.10.mlp.c_proj.weight", "transformer.h.10.mlp.c_proj.bias", "transformer.h.11.ln_1.weight", "transformer.h.11.ln_1.bias", "transformer.h.11.attn.c_attn.weight", "transformer.h.11.attn.c_attn.bias", "transformer.h.11.attn.c_proj.weight", "transformer.h.11.attn.c_proj.bias", "transformer.h.11.ln_2.weight", "transformer.h.11.ln_2.bias", "transformer.h.11.mlp.c_fc.weight", "transformer.h.11.mlp.c_fc.bias", "transformer.h.11.mlp.c_proj.weight", "transformer.h.11.mlp.c_proj.bias", "lm_head.weight".
195
+ Unexpected key(s) in state_dict: "lm_head.scale", "lm_head.zero_point", "lm_head._packed_params.dtype", "lm_head._packed_params._packed_params".
196
+ size mismatch for transformer.wte.weight: copying a param with shape torch.Size([50257, 384]) from checkpoint, the shape in current model is torch.Size([50257, 768]).
197
+ size mismatch for transformer.wpe.weight: copying a param with shape torch.Size([128, 384]) from checkpoint, the shape in current model is torch.Size([1024, 768]).
198
+ size mismatch for transformer.h.0.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
199
+ size mismatch for transformer.h.0.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
200
+ size mismatch for transformer.h.0.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
201
+ size mismatch for transformer.h.0.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
202
+ size mismatch for transformer.h.0.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
203
+ size mismatch for transformer.h.0.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
204
+ size mismatch for transformer.h.0.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
205
+ size mismatch for transformer.h.0.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
206
+ size mismatch for transformer.h.0.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
207
+ size mismatch for transformer.h.0.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
208
+ size mismatch for transformer.h.0.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
209
+ size mismatch for transformer.h.0.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
210
+ size mismatch for transformer.h.1.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
211
+ size mismatch for transformer.h.1.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
212
+ size mismatch for transformer.h.1.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
213
+ size mismatch for transformer.h.1.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
214
+ size mismatch for transformer.h.1.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
215
+ size mismatch for transformer.h.1.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
216
+ size mismatch for transformer.h.1.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
217
+ size mismatch for transformer.h.1.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
218
+ size mismatch for transformer.h.1.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
219
+ size mismatch for transformer.h.1.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
220
+ size mismatch for transformer.h.1.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
221
+ size mismatch for transformer.h.1.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
222
+ size mismatch for transformer.h.2.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
223
+ size mismatch for transformer.h.2.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
224
+ size mismatch for transformer.h.2.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
225
+ size mismatch for transformer.h.2.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
226
+ size mismatch for transformer.h.2.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
227
+ size mismatch for transformer.h.2.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
228
+ size mismatch for transformer.h.2.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
229
+ size mismatch for transformer.h.2.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
230
+ size mismatch for transformer.h.2.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
231
+ size mismatch for transformer.h.2.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
232
+ size mismatch for transformer.h.2.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
233
+ size mismatch for transformer.h.2.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
234
+ size mismatch for transformer.h.3.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
235
+ size mismatch for transformer.h.3.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
236
+ size mismatch for transformer.h.3.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
237
+ size mismatch for transformer.h.3.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
238
+ size mismatch for transformer.h.3.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
239
+ size mismatch for transformer.h.3.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
240
+ size mismatch for transformer.h.3.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
241
+ size mismatch for transformer.h.3.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
242
+ size mismatch for transformer.h.3.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
243
+ size mismatch for transformer.h.3.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
244
+ size mismatch for transformer.h.3.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
245
+ size mismatch for transformer.h.3.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
246
+ size mismatch for transformer.h.4.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
247
+ size mismatch for transformer.h.4.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
248
+ size mismatch for transformer.h.4.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
249
+ size mismatch for transformer.h.4.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
250
+ size mismatch for transformer.h.4.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
251
+ size mismatch for transformer.h.4.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
252
+ size mismatch for transformer.h.4.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
253
+ size mismatch for transformer.h.4.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
254
+ size mismatch for transformer.h.4.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
255
+ size mismatch for transformer.h.4.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
256
+ size mismatch for transformer.h.4.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
257
+ size mismatch for transformer.h.4.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
258
+ size mismatch for transformer.h.5.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
259
+ size mismatch for transformer.h.5.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
260
+ size mismatch for transformer.h.5.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
261
+ size mismatch for transformer.h.5.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
262
+ size mismatch for transformer.h.5.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
263
+ size mismatch for transformer.h.5.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
264
+ size mismatch for transformer.h.5.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
265
+ size mismatch for transformer.h.5.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
266
+ size mismatch for transformer.h.5.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
267
+ size mismatch for transformer.h.5.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
268
+ size mismatch for transformer.h.5.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
269
+ size mismatch for transformer.h.5.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
270
+ size mismatch for transformer.ln_f.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
271
+ size mismatch for transformer.ln_f.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
272
+ 2024-11-16 23:40:15,287 - main - ERROR - Failed to initialize model manager
273
+ 2024-11-16 23:45:40,456 - main - INFO - Loading tokenizer...
274
+ 2024-11-16 23:45:41,738 - main - INFO - Loading model...
275
+ 2024-11-16 23:45:42,454 - main - WARNING - Missing keys: ['lm_head.weight']
276
+ 2024-11-16 23:45:42,455 - main - WARNING - Unexpected keys: ['lm_head.scale', 'lm_head.zero_point', 'lm_head._packed_params.dtype', 'lm_head._packed_params._packed_params']
277
+ 2024-11-16 23:45:42,459 - main - INFO - Model and tokenizer loaded successfully
main.py CHANGED
@@ -5,13 +5,22 @@ import logging
5
  import sys
6
  from pydantic import BaseModel, Field
7
  import torch
8
- from transformers import GPT2Tokenizer, GPT2LMHeadModel
9
  import json
10
 
11
  # Define base model directory
12
  BASE_MODEL_DIR = "./models/"
13
  MODEL_PATH = os.path.join(BASE_MODEL_DIR, "poeticagpt.pth")
14
-
 
 
 
 
 
 
 
 
 
15
  def setup_logging():
16
  logger = logging.getLogger(__name__)
17
  logger.setLevel(logging.DEBUG)
@@ -55,7 +64,6 @@ class ModelManager:
55
  """Initialize the model and tokenizer"""
56
  try:
57
  logger.info("Loading tokenizer...")
58
- # Load the base GPT-2 tokenizer
59
  self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
60
  self.tokenizer.pad_token = self.tokenizer.eos_token
61
 
@@ -64,14 +72,19 @@ class ModelManager:
64
  logger.error(f"Model file not found at {MODEL_PATH}")
65
  return False
66
 
67
- # Initialize a GPT2 model with default configuration
68
- self.model = GPT2LMHeadModel.from_pretrained('gpt2')
69
 
70
  # Load your trained weights
71
  state_dict = torch.load(MODEL_PATH, map_location='cpu')
72
 
73
  # Load the state dictionary into the model
74
- self.model.load_state_dict(state_dict)
 
 
 
 
 
75
 
76
  # Force model to CPU and eval mode
77
  self.model.to('cpu')
@@ -85,6 +98,7 @@ class ModelManager:
85
  logger.exception("Detailed traceback:")
86
  return False
87
 
 
88
  def generate(self, request: GenerateRequest) -> Dict[str, Any]:
89
  """Generate poetry based on the request parameters"""
90
  if self.model is None or self.tokenizer is None:
 
5
  import sys
6
  from pydantic import BaseModel, Field
7
  import torch
8
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
9
  import json
10
 
11
  # Define base model directory
12
  BASE_MODEL_DIR = "./models/"
13
  MODEL_PATH = os.path.join(BASE_MODEL_DIR, "poeticagpt.pth")
14
+ MODEL_CONFIG = GPT2Config(
15
+ n_positions=128, # MAX_LENGTH from training
16
+ n_ctx=128,
17
+ n_embd=384, # Same as training
18
+ n_layer=6, # Same as training
19
+ n_head=6, # Same as training
20
+ vocab_size=50257,
21
+ bos_token_id=50256,
22
+ eos_token_id=50256,
23
+ )
24
  def setup_logging():
25
  logger = logging.getLogger(__name__)
26
  logger.setLevel(logging.DEBUG)
 
64
  """Initialize the model and tokenizer"""
65
  try:
66
  logger.info("Loading tokenizer...")
 
67
  self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
68
  self.tokenizer.pad_token = self.tokenizer.eos_token
69
 
 
72
  logger.error(f"Model file not found at {MODEL_PATH}")
73
  return False
74
 
75
+ # Initialize model with the same configuration as training
76
+ self.model = GPT2LMHeadModel(MODEL_CONFIG)
77
 
78
  # Load your trained weights
79
  state_dict = torch.load(MODEL_PATH, map_location='cpu')
80
 
81
  # Load the state dictionary into the model
82
+ missing_keys, unexpected_keys = self.model.load_state_dict(state_dict, strict=False)
83
+
84
+ if missing_keys:
85
+ logger.warning(f"Missing keys: {missing_keys}")
86
+ if unexpected_keys:
87
+ logger.warning(f"Unexpected keys: {unexpected_keys}")
88
 
89
  # Force model to CPU and eval mode
90
  self.model.to('cpu')
 
98
  logger.exception("Detailed traceback:")
99
  return False
100
 
101
+
102
  def generate(self, request: GenerateRequest) -> Dict[str, Any]:
103
  """Generate poetry based on the request parameters"""
104
  if self.model is None or self.tokenizer is None: