Spaces:

abhisheksan
/

poetica

Running

App Files Files Community

abhisheksan commited on 13 days ago

Commit

f4f946e

•

1 Parent(s): 2442c76

Add model configuration and improve model initialization in ModelManager

Browse files

Files changed (2) hide show

logs/poetry_generation.log +170 -0
main.py +20 -6

logs/poetry_generation.log CHANGED Viewed

@@ -105,3 +105,173 @@ OSError: Unable to load weights from pytorch checkpoint file for './models/pytor
 2024-11-16 23:35:18,815 - main - ERROR - Failed to initialize model manager
 2024-11-16 23:37:05,649 - main - INFO - Loading tokenizer...
 2024-11-16 23:37:06,372 - main - INFO - Loading model...

 2024-11-16 23:35:18,815 - main - ERROR - Failed to initialize model manager
 2024-11-16 23:37:05,649 - main - INFO - Loading tokenizer...
 2024-11-16 23:37:06,372 - main - INFO - Loading model...
+2024-11-16 23:40:15,280 - main - ERROR - Error initializing model: Error(s) in loading state_dict for GPT2LMHeadModel:
+	Missing key(s) in state_dict: "transformer.h.6.ln_1.weight", "transformer.h.6.ln_1.bias", "transformer.h.6.attn.c_attn.weight", "transformer.h.6.attn.c_attn.bias", "transformer.h.6.attn.c_proj.weight", "transformer.h.6.attn.c_proj.bias", "transformer.h.6.ln_2.weight", "transformer.h.6.ln_2.bias", "transformer.h.6.mlp.c_fc.weight", "transformer.h.6.mlp.c_fc.bias", "transformer.h.6.mlp.c_proj.weight", "transformer.h.6.mlp.c_proj.bias", "transformer.h.7.ln_1.weight", "transformer.h.7.ln_1.bias", "transformer.h.7.attn.c_attn.weight", "transformer.h.7.attn.c_attn.bias", "transformer.h.7.attn.c_proj.weight", "transformer.h.7.attn.c_proj.bias", "transformer.h.7.ln_2.weight", "transformer.h.7.ln_2.bias", "transformer.h.7.mlp.c_fc.weight", "transformer.h.7.mlp.c_fc.bias", "transformer.h.7.mlp.c_proj.weight", "transformer.h.7.mlp.c_proj.bias", "transformer.h.8.ln_1.weight", "transformer.h.8.ln_1.bias", "transformer.h.8.attn.c_attn.weight", "transformer.h.8.attn.c_attn.bias", "transformer.h.8.attn.c_proj.weight", "transformer.h.8.attn.c_proj.bias", "transformer.h.8.ln_2.weight", "transformer.h.8.ln_2.bias", "transformer.h.8.mlp.c_fc.weight", "transformer.h.8.mlp.c_fc.bias", "transformer.h.8.mlp.c_proj.weight", "transformer.h.8.mlp.c_proj.bias", "transformer.h.9.ln_1.weight", "transformer.h.9.ln_1.bias", "transformer.h.9.attn.c_attn.weight", "transformer.h.9.attn.c_attn.bias", "transformer.h.9.attn.c_proj.weight", "transformer.h.9.attn.c_proj.bias", "transformer.h.9.ln_2.weight", "transformer.h.9.ln_2.bias", "transformer.h.9.mlp.c_fc.weight", "transformer.h.9.mlp.c_fc.bias", "transformer.h.9.mlp.c_proj.weight", "transformer.h.9.mlp.c_proj.bias", "transformer.h.10.ln_1.weight", "transformer.h.10.ln_1.bias", "transformer.h.10.attn.c_attn.weight", "transformer.h.10.attn.c_attn.bias", "transformer.h.10.attn.c_proj.weight", "transformer.h.10.attn.c_proj.bias", "transformer.h.10.ln_2.weight", "transformer.h.10.ln_2.bias", "transformer.h.10.mlp.c_fc.weight", "transformer.h.10.mlp.c_fc.bias", "transformer.h.10.mlp.c_proj.weight", "transformer.h.10.mlp.c_proj.bias", "transformer.h.11.ln_1.weight", "transformer.h.11.ln_1.bias", "transformer.h.11.attn.c_attn.weight", "transformer.h.11.attn.c_attn.bias", "transformer.h.11.attn.c_proj.weight", "transformer.h.11.attn.c_proj.bias", "transformer.h.11.ln_2.weight", "transformer.h.11.ln_2.bias", "transformer.h.11.mlp.c_fc.weight", "transformer.h.11.mlp.c_fc.bias", "transformer.h.11.mlp.c_proj.weight", "transformer.h.11.mlp.c_proj.bias", "lm_head.weight".
+	Unexpected key(s) in state_dict: "lm_head.scale", "lm_head.zero_point", "lm_head._packed_params.dtype", "lm_head._packed_params._packed_params".
+	size mismatch for transformer.wte.weight: copying a param with shape torch.Size([50257, 384]) from checkpoint, the shape in current model is torch.Size([50257, 768]).
+	size mismatch for transformer.wpe.weight: copying a param with shape torch.Size([128, 384]) from checkpoint, the shape in current model is torch.Size([1024, 768]).
+	size mismatch for transformer.h.0.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.0.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.0.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.0.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.0.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.0.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.0.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.1.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.1.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.1.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.1.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.1.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.1.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.2.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.2.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.2.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.2.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.2.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.2.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.3.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.3.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.3.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.3.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.3.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.3.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.4.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.4.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.4.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.4.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.4.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.4.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.5.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.5.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.5.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.5.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.5.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.5.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.ln_f.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.ln_f.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+2024-11-16 23:40:15,283 - main - ERROR - Detailed traceback:
+Traceback (most recent call last):
+  File "E:\Self Work\My Projects\Poetica HuggingFace Server\poetica\main.py", line 74, in initialize
+    self.model.load_state_dict(state_dict)
+  File "e:\Self Work\My Projects\Poetica HuggingFace Server\.venv\Lib\site-packages\torch\nn\modules\module.py", line 2189, in load_state_dict
+    raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
+RuntimeError: Error(s) in loading state_dict for GPT2LMHeadModel:
+	Missing key(s) in state_dict: "transformer.h.6.ln_1.weight", "transformer.h.6.ln_1.bias", "transformer.h.6.attn.c_attn.weight", "transformer.h.6.attn.c_attn.bias", "transformer.h.6.attn.c_proj.weight", "transformer.h.6.attn.c_proj.bias", "transformer.h.6.ln_2.weight", "transformer.h.6.ln_2.bias", "transformer.h.6.mlp.c_fc.weight", "transformer.h.6.mlp.c_fc.bias", "transformer.h.6.mlp.c_proj.weight", "transformer.h.6.mlp.c_proj.bias", "transformer.h.7.ln_1.weight", "transformer.h.7.ln_1.bias", "transformer.h.7.attn.c_attn.weight", "transformer.h.7.attn.c_attn.bias", "transformer.h.7.attn.c_proj.weight", "transformer.h.7.attn.c_proj.bias", "transformer.h.7.ln_2.weight", "transformer.h.7.ln_2.bias", "transformer.h.7.mlp.c_fc.weight", "transformer.h.7.mlp.c_fc.bias", "transformer.h.7.mlp.c_proj.weight", "transformer.h.7.mlp.c_proj.bias", "transformer.h.8.ln_1.weight", "transformer.h.8.ln_1.bias", "transformer.h.8.attn.c_attn.weight", "transformer.h.8.attn.c_attn.bias", "transformer.h.8.attn.c_proj.weight", "transformer.h.8.attn.c_proj.bias", "transformer.h.8.ln_2.weight", "transformer.h.8.ln_2.bias", "transformer.h.8.mlp.c_fc.weight", "transformer.h.8.mlp.c_fc.bias", "transformer.h.8.mlp.c_proj.weight", "transformer.h.8.mlp.c_proj.bias", "transformer.h.9.ln_1.weight", "transformer.h.9.ln_1.bias", "transformer.h.9.attn.c_attn.weight", "transformer.h.9.attn.c_attn.bias", "transformer.h.9.attn.c_proj.weight", "transformer.h.9.attn.c_proj.bias", "transformer.h.9.ln_2.weight", "transformer.h.9.ln_2.bias", "transformer.h.9.mlp.c_fc.weight", "transformer.h.9.mlp.c_fc.bias", "transformer.h.9.mlp.c_proj.weight", "transformer.h.9.mlp.c_proj.bias", "transformer.h.10.ln_1.weight", "transformer.h.10.ln_1.bias", "transformer.h.10.attn.c_attn.weight", "transformer.h.10.attn.c_attn.bias", "transformer.h.10.attn.c_proj.weight", "transformer.h.10.attn.c_proj.bias", "transformer.h.10.ln_2.weight", "transformer.h.10.ln_2.bias", "transformer.h.10.mlp.c_fc.weight", "transformer.h.10.mlp.c_fc.bias", "transformer.h.10.mlp.c_proj.weight", "transformer.h.10.mlp.c_proj.bias", "transformer.h.11.ln_1.weight", "transformer.h.11.ln_1.bias", "transformer.h.11.attn.c_attn.weight", "transformer.h.11.attn.c_attn.bias", "transformer.h.11.attn.c_proj.weight", "transformer.h.11.attn.c_proj.bias", "transformer.h.11.ln_2.weight", "transformer.h.11.ln_2.bias", "transformer.h.11.mlp.c_fc.weight", "transformer.h.11.mlp.c_fc.bias", "transformer.h.11.mlp.c_proj.weight", "transformer.h.11.mlp.c_proj.bias", "lm_head.weight".
+	Unexpected key(s) in state_dict: "lm_head.scale", "lm_head.zero_point", "lm_head._packed_params.dtype", "lm_head._packed_params._packed_params".
+	size mismatch for transformer.wte.weight: copying a param with shape torch.Size([50257, 384]) from checkpoint, the shape in current model is torch.Size([50257, 768]).
+	size mismatch for transformer.wpe.weight: copying a param with shape torch.Size([128, 384]) from checkpoint, the shape in current model is torch.Size([1024, 768]).
+	size mismatch for transformer.h.0.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.0.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.0.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.0.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.0.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.0.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.0.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.0.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.1.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.1.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.1.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.1.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.1.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.1.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.1.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.2.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.2.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.2.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.2.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.2.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.2.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.2.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.3.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.3.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.3.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.3.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.3.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.3.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.3.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.4.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.4.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.4.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.4.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.4.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.4.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.4.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_1.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_1.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.attn.c_attn.weight: copying a param with shape torch.Size([384, 1152]) from checkpoint, the shape in current model is torch.Size([768, 2304]).
+	size mismatch for transformer.h.5.attn.c_attn.bias: copying a param with shape torch.Size([1152]) from checkpoint, the shape in current model is torch.Size([2304]).
+	size mismatch for transformer.h.5.attn.c_proj.weight: copying a param with shape torch.Size([384, 384]) from checkpoint, the shape in current model is torch.Size([768, 768]).
+	size mismatch for transformer.h.5.attn.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_2.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.ln_2.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.h.5.mlp.c_fc.weight: copying a param with shape torch.Size([384, 1536]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
+	size mismatch for transformer.h.5.mlp.c_fc.bias: copying a param with shape torch.Size([1536]) from checkpoint, the shape in current model is torch.Size([3072]).
+	size mismatch for transformer.h.5.mlp.c_proj.weight: copying a param with shape torch.Size([1536, 384]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
+	size mismatch for transformer.h.5.mlp.c_proj.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.ln_f.weight: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+	size mismatch for transformer.ln_f.bias: copying a param with shape torch.Size([384]) from checkpoint, the shape in current model is torch.Size([768]).
+2024-11-16 23:40:15,287 - main - ERROR - Failed to initialize model manager
+2024-11-16 23:45:40,456 - main - INFO - Loading tokenizer...
+2024-11-16 23:45:41,738 - main - INFO - Loading model...
+2024-11-16 23:45:42,454 - main - WARNING - Missing keys: ['lm_head.weight']
+2024-11-16 23:45:42,455 - main - WARNING - Unexpected keys: ['lm_head.scale', 'lm_head.zero_point', 'lm_head._packed_params.dtype', 'lm_head._packed_params._packed_params']
+2024-11-16 23:45:42,459 - main - INFO - Model and tokenizer loaded successfully

main.py CHANGED Viewed

@@ -5,13 +5,22 @@ import logging
 import sys
 from pydantic import BaseModel, Field
 import torch
-from transformers import GPT2Tokenizer, GPT2LMHeadModel
 import json
 # Define base model directory
 BASE_MODEL_DIR = "./models/"
 MODEL_PATH = os.path.join(BASE_MODEL_DIR, "poeticagpt.pth")
 def setup_logging():
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.DEBUG)
@@ -55,7 +64,6 @@ class ModelManager:
         """Initialize the model and tokenizer"""
         try:
             logger.info("Loading tokenizer...")
-            # Load the base GPT-2 tokenizer
             self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
             self.tokenizer.pad_token = self.tokenizer.eos_token
@@ -64,14 +72,19 @@ class ModelManager:
                 logger.error(f"Model file not found at {MODEL_PATH}")
                 return False
-            # Initialize a GPT2 model with default configuration
-            self.model = GPT2LMHeadModel.from_pretrained('gpt2')
             # Load your trained weights
             state_dict = torch.load(MODEL_PATH, map_location='cpu')
             # Load the state dictionary into the model
-            self.model.load_state_dict(state_dict)
             # Force model to CPU and eval mode
             self.model.to('cpu')
@@ -85,6 +98,7 @@ class ModelManager:
             logger.exception("Detailed traceback:")
             return False
     def generate(self, request: GenerateRequest) -> Dict[str, Any]:
         """Generate poetry based on the request parameters"""
         if self.model is None or self.tokenizer is None:

 import sys
 from pydantic import BaseModel, Field
 import torch
+from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
 import json
 # Define base model directory
 BASE_MODEL_DIR = "./models/"
 MODEL_PATH = os.path.join(BASE_MODEL_DIR, "poeticagpt.pth")
+MODEL_CONFIG = GPT2Config(
+    n_positions=128,  # MAX_LENGTH from training
+    n_ctx=128,
+    n_embd=384,      # Same as training
+    n_layer=6,       # Same as training
+    n_head=6,        # Same as training
+    vocab_size=50257,
+    bos_token_id=50256,
+    eos_token_id=50256,
+)
 def setup_logging():
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.DEBUG)
         """Initialize the model and tokenizer"""
         try:
             logger.info("Loading tokenizer...")
             self.tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
             self.tokenizer.pad_token = self.tokenizer.eos_token
                 logger.error(f"Model file not found at {MODEL_PATH}")
                 return False
+            # Initialize model with the same configuration as training
+            self.model = GPT2LMHeadModel(MODEL_CONFIG)
             # Load your trained weights
             state_dict = torch.load(MODEL_PATH, map_location='cpu')
             # Load the state dictionary into the model
+            missing_keys, unexpected_keys = self.model.load_state_dict(state_dict, strict=False)
+            if missing_keys:
+                logger.warning(f"Missing keys: {missing_keys}")
+            if unexpected_keys:
+                logger.warning(f"Unexpected keys: {unexpected_keys}")
             # Force model to CPU and eval mode
             self.model.to('cpu')
             logger.exception("Detailed traceback:")
             return False
     def generate(self, request: GenerateRequest) -> Dict[str, Any]:
         """Generate poetry based on the request parameters"""
         if self.model is None or self.tokenizer is None: