chargoddard
/

internlm2-7b-llama

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

internlm2-7b-llama / convert_weights.py

chargoddard's picture

Add script for weight conversion

7d774e5 verified 10 months ago

3.69 kB

	#!/usr/bin/env python3
	# 1/17/2024
	# Charles O. Goddard
	"""Convert internlm2 weights to Llama format."""

	import json
	import os
	import einops
	import tqdm
	from mergekit.io import LazyTensorLoader, TensorWriter
	from mergekit.common import ModelReference
	from transformers import LlamaTokenizer

	MODEL_IN = "internlm/internlm2-20b"
	OUT_PATH = "./internlm2-20b-llama"

	model_ref = ModelReference.parse(MODEL_IN)
	cfg = model_ref.config(trust_remote_code=True)
	head_dim = cfg.hidden_size // cfg.num_attention_heads
	num_key_value_groups = cfg.num_attention_heads // cfg.num_key_value_heads
	loader = LazyTensorLoader(model_ref.tensor_index(), lazy_unpickle=True)
	writer = TensorWriter(OUT_PATH)

	SIMPLE_REPLACEMENTS = {
	"feed_forward.w1": "mlp.gate_proj",
	"feed_forward.w2": "mlp.down_proj",
	"feed_forward.w3": "mlp.up_proj",
	"attention.wo": "self_attn.o_proj",
	"ffn_norm": "post_attention_layernorm",
	"attention_norm": "input_layernorm",
	"tok_embeddings": "embed_tokens",
	"output.weight": "lm_head.weight",
	}

	for tensor_name in tqdm.tqdm(loader.index.tensor_paths):
	tensor = loader.get_tensor(tensor_name)
	if "attention.wqkv" in tensor_name:
	# make me think about tensor shapes will you >:(

	# ((cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim, cfg.hidden_size) x (batch_sz, sq_len, cfg.hidden_size)
	# -> (batch_sz, sq_len, (cfg.num_attention_heads + 2 * cfg.num_key_value_heads) * head_dim)
	# qkv_states = rearrange(
	# qkv_states,
	# "b q (h gs d) -> b q h gs d",
	# gs=2 + self.num_key_value_groups,
	# d=self.head_dim,
	# )
	# ->(batch_sz, sq_len, h, 2 + self.num_key_value_groups, head_dim)
	qkv_vecs = einops.rearrange(
	tensor, "(h gs d) z -> h gs d z", gs=2 + num_key_value_groups, d=head_dim
	)
	q_proj = (
	qkv_vecs[:, :num_key_value_groups, ...]
	.reshape(-1, cfg.hidden_size)
	.contiguous()
	)
	k_proj = qkv_vecs[:, -2, ...].reshape(-1, cfg.hidden_size).contiguous()
	v_proj = qkv_vecs[:, -1, ...].reshape(-1, cfg.hidden_size).contiguous()
	assert k_proj.shape == v_proj.shape

	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.q_proj"),
	q_proj,
	clone=True,
	)
	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.k_proj"),
	k_proj,
	clone=True,
	)
	writer.save_tensor(
	tensor_name.replace("attention.wqkv", "self_attn.v_proj"),
	v_proj,
	clone=True,
	)
	continue

	out_name = tensor_name
	for pattern, sub in SIMPLE_REPLACEMENTS.items():
	if pattern in out_name:
	out_name = out_name.replace(pattern, sub)
	writer.save_tensor(out_name, tensor)
	writer.finalize()

	cfg_dict = json.loads(cfg.to_json_string())
	del cfg_dict["auto_map"]
	cfg_dict["architectures"] = "LlamaForCausalLM"
	cfg_dict["model_type"] = "llama"
	if "rope_scaling" in cfg_dict and cfg_dict["rope_scaling"]["factor"] == 1.0:
	del cfg_dict["rope_scaling"]
	with open(os.path.join(OUT_PATH, "config.json"), "w", encoding="utf-8") as fp:
	json.dump(cfg_dict, fp, indent=2)

	# InternLMTokenizer differences:
	# 1. clean_up_tokenization() hardcoded to always be called
	# 2. might prepend a space to some tokens that LlamaTokenizer doesn't if they're the first token
	# 1 is easy to fix, 2... is not important
	tok = LlamaTokenizer.from_pretrained(MODEL_IN, trust_remote_code=False, legacy=True)
	tok.clean_up_tokenization_spaces = True
	tok.save_pretrained(OUT_PATH)