maywell
/

EXAONE-3.0-7.8B-Instruct-Llamafied

@@ -9,22 +9,22 @@ LG에서 동일 라이센스 재배포조차 막아버린 관계로 모델을
 ```python
 import torch
-from transformers import LlamaConfig, LlamaForCausalLM
-from modeling_exaone import ExaoneForCausalLM
 import gc
-def load_model(model_path, model_class, torch_dtype=torch.bfloat16):
-    """Load and return a model given its path and class."""
-    return model_class.from_pretrained(model_path, torch_dtype=torch_dtype, device_map="auto")
 def unload_model(model):
     """Clear memory by deleting a model and calling the garbage collector."""
     del model
     gc.collect()
-    torch.cuda.empty_cache()
 def create_llama_config(exaone_config):
-    """Create and return a LLaMA configuration based on EXAONE config."""
     return LlamaConfig(
         vocab_size=exaone_config.vocab_size,
         hidden_size=exaone_config.hidden_size,
@@ -35,65 +35,68 @@ def create_llama_config(exaone_config):
         rms_norm_eps=exaone_config.layer_norm_epsilon,
         num_key_value_heads=exaone_config.num_key_value_heads,
         rope_theta=exaone_config.rope_theta,
         attention_bias=False,
     )
 def copy_embedding_weights(llama_model, exaone_model):
-    """Copy embedding weights from EXAONE to LLaMA model."""
-    llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data
-def copy_layer_weights(llama_layer, exaone_layer):
-    """Copy weights for a single layer from EXAONE to LLaMA model."""
     # Self-attention
-    llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data
-    llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data
-    llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data
-    llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data
     # MLP
-    llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data
-    llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data
-    llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data
     # Layer Norms
-    llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data
-    llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data
 def copy_final_weights(llama_model, exaone_model):
-    """Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
-    llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data
-    llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data
 def port_exaone_to_llama(exaone_model_path, llama_model_path):
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print("Loading EXAONE model...")
-    exaone_model = load_model(exaone_model_path, ExaoneForCausalLM).to(device)
     exaone_config = exaone_model.config
-    print("Creating LLaMA configuration...")
     llama_config = create_llama_config(exaone_config)
-    print("Initializing LLaMA model...")
-    llama_model = LlamaForCausalLM(llama_config).to(device)
     print("Copying weights...")
     copy_embedding_weights(llama_model, exaone_model)
-    for i in range(exaone_config.num_layers):
-        print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
-        copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
     copy_final_weights(llama_model, exaone_model)
     print("Unloading EXAONE model to free memory...")
     unload_model(exaone_model)
-    print(f"Saving ported LLaMA model to {llama_model_path}")
     llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
-    print("Unloading LLaMA model...")
     unload_model(llama_model)
-    print(f"EXAONE model successfully ported to LLaMA format and saved at {llama_model_path}")
 if __name__ == "__main__":
     exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"

 ```python
 import torch
 import gc
+from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
+from tqdm import tqdm
 def unload_model(model):
     """Clear memory by deleting a model and calling the garbage collector."""
     del model
     gc.collect()
+    # if torch.cuda.is_available():
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
 def create_llama_config(exaone_config):
+    """Create and return a Llama configuration based on EXAONE config."""
     return LlamaConfig(
         vocab_size=exaone_config.vocab_size,
         hidden_size=exaone_config.hidden_size,
         rms_norm_eps=exaone_config.layer_norm_epsilon,
         num_key_value_heads=exaone_config.num_key_value_heads,
         rope_theta=exaone_config.rope_theta,
+        bos_token_id=exaone_config.bos_token_id,
+        eos_token_id=exaone_config.eos_token_id,
+        pad_token_id=exaone_config.pad_token_id,
         attention_bias=False,
     )
 def copy_embedding_weights(llama_model, exaone_model):
+    """Copy embedding weights from EXAONE to Llama model."""
+    llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
+def copy_layer_weights(llama_layer, exaone_layer, device):
+    """Copy weights for a single layer from EXAONE to Llama model."""
     # Self-attention
+    llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
+    llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
+    llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
+    llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
     # MLP
+    llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
+    llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
+    llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
     # Layer Norms
+    llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
+    llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
 def copy_final_weights(llama_model, exaone_model):
+    """Copy final layer norm and LM head weights from EXAONE to Llama model."""
+    llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
+    llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
 def port_exaone_to_llama(exaone_model_path, llama_model_path):
+    print("Loading EXAONE model and tokenizer...")
+    exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
+    exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
     exaone_config = exaone_model.config
+    print("Creating Llama configuration...")
     llama_config = create_llama_config(exaone_config)
+    print("Initializing Llama model...")
+    llama_model = LlamaForCausalLM(llama_config)
+    llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
     print("Copying weights...")
     copy_embedding_weights(llama_model, exaone_model)
+    for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
+        copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
     copy_final_weights(llama_model, exaone_model)
     print("Unloading EXAONE model to free memory...")
     unload_model(exaone_model)
+    print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
     llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
+    exaone_tokenizer.save_pretrained(llama_model_path)
+    print("Unloading Llama model...")
     unload_model(llama_model)
+    print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
 if __name__ == "__main__":
     exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"