maywell kuotient commited on
Commit
839e7cc
·
verified ·
1 Parent(s): 0a64dcc

Some improvements (#1)

Browse files

- Some improvements (0b14c9ff6f246f6341b1c44ea44a9ecd294addbe)


Co-authored-by: Jisoo Kim <kuotient@users.noreply.huggingface.co>

Files changed (1) hide show
  1. README.md +39 -36
README.md CHANGED
@@ -9,22 +9,22 @@ LG에서 동일 라이센스 재배포조차 막아버린 관계로 모델을
9
 
10
  ```python
11
  import torch
12
- from transformers import LlamaConfig, LlamaForCausalLM
13
- from modeling_exaone import ExaoneForCausalLM
14
  import gc
15
 
16
- def load_model(model_path, model_class, torch_dtype=torch.bfloat16):
17
- """Load and return a model given its path and class."""
18
- return model_class.from_pretrained(model_path, torch_dtype=torch_dtype, device_map="auto")
19
 
20
  def unload_model(model):
21
  """Clear memory by deleting a model and calling the garbage collector."""
22
  del model
23
  gc.collect()
24
- torch.cuda.empty_cache()
 
 
25
 
26
  def create_llama_config(exaone_config):
27
- """Create and return a LLaMA configuration based on EXAONE config."""
28
  return LlamaConfig(
29
  vocab_size=exaone_config.vocab_size,
30
  hidden_size=exaone_config.hidden_size,
@@ -35,65 +35,68 @@ def create_llama_config(exaone_config):
35
  rms_norm_eps=exaone_config.layer_norm_epsilon,
36
  num_key_value_heads=exaone_config.num_key_value_heads,
37
  rope_theta=exaone_config.rope_theta,
 
 
 
38
  attention_bias=False,
39
  )
40
 
41
  def copy_embedding_weights(llama_model, exaone_model):
42
- """Copy embedding weights from EXAONE to LLaMA model."""
43
- llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data
44
 
45
- def copy_layer_weights(llama_layer, exaone_layer):
46
- """Copy weights for a single layer from EXAONE to LLaMA model."""
47
  # Self-attention
48
- llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data
49
- llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data
50
- llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data
51
- llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data
52
  # MLP
53
- llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data
54
- llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data
55
- llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data
56
  # Layer Norms
57
- llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data
58
- llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data
59
 
60
  def copy_final_weights(llama_model, exaone_model):
61
- """Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
62
- llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data
63
- llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data
64
 
65
  def port_exaone_to_llama(exaone_model_path, llama_model_path):
66
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
-
68
- print("Loading EXAONE model...")
69
- exaone_model = load_model(exaone_model_path, ExaoneForCausalLM).to(device)
70
  exaone_config = exaone_model.config
71
 
72
- print("Creating LLaMA configuration...")
73
  llama_config = create_llama_config(exaone_config)
74
 
75
- print("Initializing LLaMA model...")
76
- llama_model = LlamaForCausalLM(llama_config).to(device)
 
77
 
78
  print("Copying weights...")
79
  copy_embedding_weights(llama_model, exaone_model)
80
 
81
- for i in range(exaone_config.num_layers):
82
- print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
83
- copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
84
 
85
  copy_final_weights(llama_model, exaone_model)
86
 
87
  print("Unloading EXAONE model to free memory...")
88
  unload_model(exaone_model)
89
 
90
- print(f"Saving ported LLaMA model to {llama_model_path}")
91
  llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
 
92
 
93
- print("Unloading LLaMA model...")
94
  unload_model(llama_model)
95
 
96
- print(f"EXAONE model successfully ported to LLaMA format and saved at {llama_model_path}")
97
 
98
  if __name__ == "__main__":
99
  exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
 
9
 
10
  ```python
11
  import torch
 
 
12
  import gc
13
 
14
+ from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
15
+ from tqdm import tqdm
16
+
17
 
18
  def unload_model(model):
19
  """Clear memory by deleting a model and calling the garbage collector."""
20
  del model
21
  gc.collect()
22
+ # if torch.cuda.is_available():
23
+ if torch.cuda.is_available():
24
+ torch.cuda.empty_cache()
25
 
26
  def create_llama_config(exaone_config):
27
+ """Create and return a Llama configuration based on EXAONE config."""
28
  return LlamaConfig(
29
  vocab_size=exaone_config.vocab_size,
30
  hidden_size=exaone_config.hidden_size,
 
35
  rms_norm_eps=exaone_config.layer_norm_epsilon,
36
  num_key_value_heads=exaone_config.num_key_value_heads,
37
  rope_theta=exaone_config.rope_theta,
38
+ bos_token_id=exaone_config.bos_token_id,
39
+ eos_token_id=exaone_config.eos_token_id,
40
+ pad_token_id=exaone_config.pad_token_id,
41
  attention_bias=False,
42
  )
43
 
44
  def copy_embedding_weights(llama_model, exaone_model):
45
+ """Copy embedding weights from EXAONE to Llama model."""
46
+ llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
47
 
48
+ def copy_layer_weights(llama_layer, exaone_layer, device):
49
+ """Copy weights for a single layer from EXAONE to Llama model."""
50
  # Self-attention
51
+ llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
52
+ llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
53
+ llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
54
+ llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
55
  # MLP
56
+ llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
57
+ llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
58
+ llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
59
  # Layer Norms
60
+ llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
61
+ llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
62
 
63
  def copy_final_weights(llama_model, exaone_model):
64
+ """Copy final layer norm and LM head weights from EXAONE to Llama model."""
65
+ llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
66
+ llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
67
 
68
  def port_exaone_to_llama(exaone_model_path, llama_model_path):
69
+ print("Loading EXAONE model and tokenizer...")
70
+ exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
71
+ exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
 
72
  exaone_config = exaone_model.config
73
 
74
+ print("Creating Llama configuration...")
75
  llama_config = create_llama_config(exaone_config)
76
 
77
+ print("Initializing Llama model...")
78
+ llama_model = LlamaForCausalLM(llama_config)
79
+ llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
80
 
81
  print("Copying weights...")
82
  copy_embedding_weights(llama_model, exaone_model)
83
 
84
+ for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
85
+ copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
 
86
 
87
  copy_final_weights(llama_model, exaone_model)
88
 
89
  print("Unloading EXAONE model to free memory...")
90
  unload_model(exaone_model)
91
 
92
+ print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
93
  llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
94
+ exaone_tokenizer.save_pretrained(llama_model_path)
95
 
96
+ print("Unloading Llama model...")
97
  unload_model(llama_model)
98
 
99
+ print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
100
 
101
  if __name__ == "__main__":
102
  exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"