PEFT/LoRA error when merging base and adapter
I tried this example:
model = prepare_model_for_int8_training(model, use_gradient_checkpointing=gradient_checkpointing)
# The dimension used by the LoRA update matrices
LORA_R = 4
# Scaling factor
LORA_ALPHA = 16
LORA_DROPOUT = 0.05
# r and alpha together control the total number of final trainable parameters when using LoRA, giving you the flexibility to balance a trade-off between end performance and compute efficiency.
config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
lora_dropout=LORA_DROPOUT,
bias="none", # Specifies if the bias parameters should be trained
task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
I train, and push to hub successfully.
But, when I try to use the adapter with the base model, I get an error:
from peft import PeftConfig
config = PeftConfig.from_pretrained(repo_name)
Out[19]: PeftConfig(peft_type='LORA', base_model_name_or_path='databricks/dolly-v2-3b', task_type='CAUSAL_LM', inference_mode=True)
from transformers import AutoModelForCausalLM
from peft import PeftModel
import torch
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
device_map="auto",
torch_dtype=torch.bfloat16,
trust_remote_code=True,
)
# Load the LoRA model
inference_model = PeftModel.from_pretrained(model, repo_name) # <-- error here
inference_model.eval()
inference_model
Error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
File <command-3660940350576262>:12
5 model = AutoModelForCausalLM.from_pretrained(
6 config.base_model_name_or_path,
7 device_map="auto",
8 torch_dtype=torch.bfloat16,
9 trust_remote_code=True,
10 )
11 # Load the LoRA model
---> 12 inference_model = PeftModel.from_pretrained(model, repo_name)
13 inference_model.eval()
14 inference_model
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/peft/peft_model.py:181, in PeftModel.from_pretrained(cls, model, model_id, adapter_name, is_trainable, **kwargs)
179 else:
180 model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
--> 181 model.load_adapter(model_id, adapter_name, **kwargs)
182 return model
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/peft/peft_model.py:384, in PeftModel.load_adapter(self, model_id, adapter_name, is_trainable, **kwargs)
380 adapters_weights = torch.load(
381 filename, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")
382 )
383 # load the weights into the model
--> 384 set_peft_model_state_dict(self, adapters_weights, adapter_name=adapter_name)
385 if (
386 (getattr(self, "hf_device_map", None) is not None)
387 and (len(set(self.hf_device_map.values()).intersection({"cpu", "disk"})) > 0)
388 and len(self.peft_config) == 1
389 ):
390 device_map = kwargs.get("device_map", "auto")
File /local_disk0/.ephemeral_nfs/cluster_libraries/python/lib/python3.9/site-packages/peft/utils/save_and_load.py:123, in set_peft_model_state_dict(model, peft_model_state_dict, adapter_name)
120 else:
121 raise NotImplementedError
--> 123 model.load_state_dict(peft_model_state_dict, strict=False)
124 if isinstance(config, PromptLearningConfig):
125 model.prompt_encoder[adapter_name].embedding.load_state_dict(
126 {"weight": peft_model_state_dict["prompt_embeddings"]}, strict=True
127 )
File /databricks/python/lib/python3.9/site-packages/torch/nn/modules/module.py:1671, in Module.load_state_dict(self, state_dict, strict)
1666 error_msgs.insert(
1667 0, 'Missing key(s) in state_dict: {}. '.format(
1668 ', '.join('"{}"'.format(k) for k in missing_keys)))
1670 if len(error_msgs) > 0:
-> 1671 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
1672 self.__class__.__name__, "\n\t".join(error_msgs)))
1673 return _IncompatibleKeys(missing_keys, unexpected_keys)
And then this is also printed out for layers 0 to 31.
RuntimeError: Error(s) in loading state_dict for PeftModelForCausalLM: size mismatch for base_model.model.gpt_neox.layers.0.attention.query_key_value.lora_B.default.weight: copying a param with shape torch.Size([0]) from checkpoint, the shape in current model is torch.Size([7680, 4]).
Not sure if this is due to a misconfigured LoraConfig
,so any pointers will be appreciated.
There's no modules_to_save
or target_modules
in the referenced example, so I'm wondering if this has since become a requirement to get Dolly to be PEFTuned with LoRA. (presuming the referenced author got it working)
Here's the architecture:
GPTNeoXForCausalLM(
(gpt_neox): GPTNeoXModel(
(embed_in): Embedding(50280, 2560)
(layers): ModuleList(
(0): GPTNeoXLayer(
(input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
(post_attention_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
(attention): GPTNeoXAttention(
(rotary_emb): RotaryEmbedding()
(query_key_value): Linear(in_features=2560, out_features=7680, bias=True)
(dense): Linear(in_features=2560, out_features=2560, bias=True)
)
(mlp): GPTNeoXMLP(
(dense_h_to_4h): Linear(in_features=2560, out_features=10240, bias=True)
(dense_4h_to_h): Linear(in_features=10240, out_features=2560, bias=True)
(act): GELUActivation()
)
)
// GPTNeoXLayer repeated another 31 times
)
(final_layer_norm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
)
(embed_out): Linear(in_features=2560, out_features=50280, bias=False)
)
So, I'm wondering if (some/all of) those variables need to be ["layers"]
or ["embed_out"]
or both, even.
(I'll try these, and report back.)
I cross-posted to Github: https://github.com/huggingface/peft/issues/460
For future readers, there are two things to know about using LoRA:
You can't use DeepSpeed ZeRO 3; set ZeRO level 2 or else the adapter.bin file gets truncated.
target_modules for a GPT-NeoX architecture should be ["query_key_value"]