chargoddard
/

internlm2-7b-llama

+#!/usr/bin/env python3
+# 1/17/2024
+# Charles O. Goddard
+"""Convert internlm2 weights to Llama format."""
+import json
+import os
+import einops
+import tqdm
+from mergekit.io import LazyTensorLoader, TensorWriter
+from mergekit.common import ModelReference
+from transformers import LlamaTokenizer
+MODEL_IN = "internlm/internlm2-20b"
+OUT_PATH = "./internlm2-20b-llama"
+model_ref = ModelReference.parse(MODEL_IN)
+cfg = model_ref.config(trust_remote_code=True)
+head_dim = cfg.hidden_size // cfg.num_attention_heads
+num_key_value_groups = cfg.num_attention_heads // cfg.num_key_value_heads
+loader = LazyTensorLoader(model_ref.tensor_index(), lazy_unpickle=True)
+writer = TensorWriter(OUT_PATH)
+SIMPLE_REPLACEMENTS = {
+    "feed_forward.w1": "mlp.gate_proj",
+    "feed_forward.w2": "mlp.down_proj",
+    "feed_forward.w3": "mlp.up_proj",
+    "attention.wo": "self_attn.o_proj",
+    "ffn_norm": "post_attention_layernorm",
+    "attention_norm": "input_layernorm",
+    "tok_embeddings": "embed_tokens",
+    "output.weight": "lm_head.weight",
+}
+for tensor_name in tqdm.tqdm(loader.index.tensor_paths):
+    tensor = loader.get_tensor(tensor_name)
+    if "attention.wqkv" in tensor_name:
+        # make me think about tensor shapes will you >:(
+        # ((cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim, cfg.hidden_size) x (batch_sz, sq_len, cfg.hidden_size)
+        # -> (batch_sz, sq_len, (cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim)
+        # qkv_states = rearrange(
+        #     qkv_states,
+        #     "b q (h gs d) -> b q h gs d",
+        #     gs=2 + self.num_key_value_groups,
+        #     d=self.head_dim,
+        # )
+        # ->(batch_sz, sq_len, h, 2 + self.num_key_value_groups, head_dim)
+        qkv_vecs = einops.rearrange(
+            tensor, "(h gs d) z -> h gs d z", gs=2 + num_key_value_groups, d=head_dim
+        )
+        q_proj = (
+            qkv_vecs[:, :num_key_value_groups, ...]
+            .reshape(-1, cfg.hidden_size)
+            .contiguous()
+        )
+        k_proj = qkv_vecs[:, -2, ...].reshape(-1, cfg.hidden_size).contiguous()
+        v_proj = qkv_vecs[:, -1, ...].reshape(-1, cfg.hidden_size).contiguous()
+        assert k_proj.shape == v_proj.shape
+        writer.save_tensor(
+            tensor_name.replace("attention.wqkv", "self_attn.q_proj"),
+            q_proj,
+            clone=True,
+        )
+        writer.save_tensor(
+            tensor_name.replace("attention.wqkv", "self_attn.k_proj"),
+            k_proj,
+            clone=True,
+        )
+        writer.save_tensor(
+            tensor_name.replace("attention.wqkv", "self_attn.v_proj"),
+            v_proj,
+            clone=True,
+        )
+        continue
+    out_name = tensor_name
+    for pattern, sub in SIMPLE_REPLACEMENTS.items():
+        if pattern in out_name:
+            out_name = out_name.replace(pattern, sub)
+    writer.save_tensor(out_name, tensor)
+writer.finalize()
+cfg_dict = json.loads(cfg.to_json_string())
+del cfg_dict["auto_map"]
+cfg_dict["architectures"] = "LlamaForCausalLM"
+cfg_dict["model_type"] = "llama"
+if "rope_scaling" in cfg_dict and cfg_dict["rope_scaling"]["factor"] == 1.0:
+    del cfg_dict["rope_scaling"]
+with open(os.path.join(OUT_PATH, "config.json"), "w", encoding="utf-8") as fp:
+    json.dump(cfg_dict, fp, indent=2)
+# InternLMTokenizer differences:
+# 1. clean_up_tokenization() hardcoded to always be called
+# 2. might prepend a space to some tokens that LlamaTokenizer doesn't if they're the first token
+# 1 is easy to fix, 2... is not important
+tok = LlamaTokenizer.from_pretrained(MODEL_IN, trust_remote_code=False, legacy=True)
+tok.clean_up_tokenization_spaces = True
+tok.save_pretrained(OUT_PATH)