Mortie1
/

new-nlp-hw3-llama3

Text Generation

Model card Files Files and versions Community

Mortie1 commited on Nov 12, 2024

Commit

f11bd15

·

verified ·

1 Parent(s): 8ff2dc0

Upload MyLLaMa

Files changed (1) hide show

configure_for_hf.py +14 -0

configure_for_hf.py CHANGED Viewed

@@ -41,6 +41,20 @@ class MyLLaMa(PreTrainedModel):
             n_chckpnt_segments=config.n_chckpnt_segments,
         )
     def forward(self, tensor, labels=None):
         logits = self.model(tensor)["logits"]
         if labels is not None:

             n_chckpnt_segments=config.n_chckpnt_segments,
         )
+    def load_state_dict(self, state_dict, **kwargs):
+        for key in list(state_dict.keys()):
+            if "rmsnorm1.weight" in key:
+                new_key = key.replace("rmsnorm1.weight", "rmsnorm1.gamma")
+                state_dict[new_key] = state_dict.pop(key)
+            elif "rmsnorm2.weight" in key:
+                new_key = key.replace("rmsnorm2.weight", "rmsnorm2.gamma")
+                state_dict[new_key] = state_dict.pop(key)
+            elif "rmsnorm.weight" in key:
+                new_key = key.replace("rmsnorm.weight", "rmsnorm.gamma")
+                state_dict[new_key] = state_dict.pop(key)
+        super().load_state_dict(state_dict, **kwargs)
     def forward(self, tensor, labels=None):
         logits = self.model(tensor)["logits"]
         if labels is not None: