Some improvements (#1)
Browse files- Some improvements (0b14c9ff6f246f6341b1c44ea44a9ecd294addbe)
Co-authored-by: Jisoo Kim <kuotient@users.noreply.huggingface.co>
README.md
CHANGED
@@ -9,22 +9,22 @@ LG에서 동일 라이센스 재배포조차 막아버린 관계로 모델을
|
|
9 |
|
10 |
```python
|
11 |
import torch
|
12 |
-
from transformers import LlamaConfig, LlamaForCausalLM
|
13 |
-
from modeling_exaone import ExaoneForCausalLM
|
14 |
import gc
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
|
20 |
def unload_model(model):
|
21 |
"""Clear memory by deleting a model and calling the garbage collector."""
|
22 |
del model
|
23 |
gc.collect()
|
24 |
-
torch.cuda.
|
|
|
|
|
25 |
|
26 |
def create_llama_config(exaone_config):
|
27 |
-
"""Create and return a
|
28 |
return LlamaConfig(
|
29 |
vocab_size=exaone_config.vocab_size,
|
30 |
hidden_size=exaone_config.hidden_size,
|
@@ -35,65 +35,68 @@ def create_llama_config(exaone_config):
|
|
35 |
rms_norm_eps=exaone_config.layer_norm_epsilon,
|
36 |
num_key_value_heads=exaone_config.num_key_value_heads,
|
37 |
rope_theta=exaone_config.rope_theta,
|
|
|
|
|
|
|
38 |
attention_bias=False,
|
39 |
)
|
40 |
|
41 |
def copy_embedding_weights(llama_model, exaone_model):
|
42 |
-
"""Copy embedding weights from EXAONE to
|
43 |
-
llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data
|
44 |
|
45 |
-
def copy_layer_weights(llama_layer, exaone_layer):
|
46 |
-
"""Copy weights for a single layer from EXAONE to
|
47 |
# Self-attention
|
48 |
-
llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data
|
49 |
-
llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data
|
50 |
-
llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data
|
51 |
-
llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data
|
52 |
# MLP
|
53 |
-
llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data
|
54 |
-
llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data
|
55 |
-
llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data
|
56 |
# Layer Norms
|
57 |
-
llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data
|
58 |
-
llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data
|
59 |
|
60 |
def copy_final_weights(llama_model, exaone_model):
|
61 |
-
"""Copy final layer norm and LM head weights from EXAONE to
|
62 |
-
llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data
|
63 |
-
llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data
|
64 |
|
65 |
def port_exaone_to_llama(exaone_model_path, llama_model_path):
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
exaone_model = load_model(exaone_model_path, ExaoneForCausalLM).to(device)
|
70 |
exaone_config = exaone_model.config
|
71 |
|
72 |
-
print("Creating
|
73 |
llama_config = create_llama_config(exaone_config)
|
74 |
|
75 |
-
print("Initializing
|
76 |
-
llama_model = LlamaForCausalLM(llama_config)
|
|
|
77 |
|
78 |
print("Copying weights...")
|
79 |
copy_embedding_weights(llama_model, exaone_model)
|
80 |
|
81 |
-
for i in range(exaone_config.num_layers):
|
82 |
-
|
83 |
-
copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
|
84 |
|
85 |
copy_final_weights(llama_model, exaone_model)
|
86 |
|
87 |
print("Unloading EXAONE model to free memory...")
|
88 |
unload_model(exaone_model)
|
89 |
|
90 |
-
print(f"Saving ported
|
91 |
llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
|
|
|
92 |
|
93 |
-
print("Unloading
|
94 |
unload_model(llama_model)
|
95 |
|
96 |
-
print(f"EXAONE model successfully ported to
|
97 |
|
98 |
if __name__ == "__main__":
|
99 |
exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
|
|
|
9 |
|
10 |
```python
|
11 |
import torch
|
|
|
|
|
12 |
import gc
|
13 |
|
14 |
+
from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
|
15 |
+
from tqdm import tqdm
|
16 |
+
|
17 |
|
18 |
def unload_model(model):
|
19 |
"""Clear memory by deleting a model and calling the garbage collector."""
|
20 |
del model
|
21 |
gc.collect()
|
22 |
+
# if torch.cuda.is_available():
|
23 |
+
if torch.cuda.is_available():
|
24 |
+
torch.cuda.empty_cache()
|
25 |
|
26 |
def create_llama_config(exaone_config):
|
27 |
+
"""Create and return a Llama configuration based on EXAONE config."""
|
28 |
return LlamaConfig(
|
29 |
vocab_size=exaone_config.vocab_size,
|
30 |
hidden_size=exaone_config.hidden_size,
|
|
|
35 |
rms_norm_eps=exaone_config.layer_norm_epsilon,
|
36 |
num_key_value_heads=exaone_config.num_key_value_heads,
|
37 |
rope_theta=exaone_config.rope_theta,
|
38 |
+
bos_token_id=exaone_config.bos_token_id,
|
39 |
+
eos_token_id=exaone_config.eos_token_id,
|
40 |
+
pad_token_id=exaone_config.pad_token_id,
|
41 |
attention_bias=False,
|
42 |
)
|
43 |
|
44 |
def copy_embedding_weights(llama_model, exaone_model):
|
45 |
+
"""Copy embedding weights from EXAONE to Llama model."""
|
46 |
+
llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
|
47 |
|
48 |
+
def copy_layer_weights(llama_layer, exaone_layer, device):
|
49 |
+
"""Copy weights for a single layer from EXAONE to Llama model."""
|
50 |
# Self-attention
|
51 |
+
llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
|
52 |
+
llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
|
53 |
+
llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
|
54 |
+
llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
|
55 |
# MLP
|
56 |
+
llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
|
57 |
+
llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
|
58 |
+
llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
|
59 |
# Layer Norms
|
60 |
+
llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
|
61 |
+
llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
|
62 |
|
63 |
def copy_final_weights(llama_model, exaone_model):
|
64 |
+
"""Copy final layer norm and LM head weights from EXAONE to Llama model."""
|
65 |
+
llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
|
66 |
+
llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
|
67 |
|
68 |
def port_exaone_to_llama(exaone_model_path, llama_model_path):
|
69 |
+
print("Loading EXAONE model and tokenizer...")
|
70 |
+
exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
|
71 |
+
exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
|
|
|
72 |
exaone_config = exaone_model.config
|
73 |
|
74 |
+
print("Creating Llama configuration...")
|
75 |
llama_config = create_llama_config(exaone_config)
|
76 |
|
77 |
+
print("Initializing Llama model...")
|
78 |
+
llama_model = LlamaForCausalLM(llama_config)
|
79 |
+
llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
|
80 |
|
81 |
print("Copying weights...")
|
82 |
copy_embedding_weights(llama_model, exaone_model)
|
83 |
|
84 |
+
for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
|
85 |
+
copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
|
|
|
86 |
|
87 |
copy_final_weights(llama_model, exaone_model)
|
88 |
|
89 |
print("Unloading EXAONE model to free memory...")
|
90 |
unload_model(exaone_model)
|
91 |
|
92 |
+
print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
|
93 |
llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
|
94 |
+
exaone_tokenizer.save_pretrained(llama_model_path)
|
95 |
|
96 |
+
print("Unloading Llama model...")
|
97 |
unload_model(llama_model)
|
98 |
|
99 |
+
print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
|