trainer unable to train with RuntimeError: masked_scatter_: expected self and source to have same dtypes but got Half and Float
#8
by
chongdashu
- opened
Following everything in the blog and with the changes here - Running it on Lightning Studio with an L4 GPU - but hitting this error right at the end when training
--------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Cell In[11], line 1
----> 1 trainer.train()
File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/trainer.py:1885, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1883 hf_hub_utils.enable_progress_bars()
1884 else:
-> 1885 return inner_training_loop(
1886 args=args,
1887 resume_from_checkpoint=resume_from_checkpoint,
1888 trial=trial,
1889 ignore_keys_for_eval=ignore_keys_for_eval,
1890 )
File /home/zeus/miniconda3/envs/cloudspace/lib/python3.10/site-packages/transformers/trainer.py:2216, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
2213 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
2215 with self.accelerator.accumulate(model):
-> 2216 tr_loss_step = self.training_step(model, inputs)
2218 if (
2219 args.logging_nan_inf_filter
2220 and not is_torch_xla_available()
2221 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
2222 ):
2223 # if loss is nan or inf simply add the average of previous logged losses
...
306 )
307 final_embedding = torch.where(pad_mask_expanded, torch.zeros_like(final_embedding), final_embedding)
309 final_attention_mask_4d = attention_mask.unsqueeze(1).unsqueeze(2) * attention_mask.unsqueeze(1).unsqueeze(-1)
RuntimeError: masked_scatter_: expected self and source to have same dtypes but got Half and Float
I am using LORA as well, so maybe that's the issue?
from peft import get_peft_model, LoraConfig
lora_config = LoraConfig(
r=8,
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
task_type="CAUSAL_LM"
)
# Use BitsAndBytes Config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
model = PaliGemmaForConditionalGeneration.from_pretrained(
model_id,
quantization_config=bnb_config,
device_map= {"":0}
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
output_dir=".output",
num_train_epochs=2,
remove_unused_columns=False,
per_device_train_batch_size=16,
gradient_accumulation_steps=4,
warmup_steps=2,
learning_rate=2e-5,
weight_decay=1e-6,
adam_beta2=0.999,
logging_steps=100,
# optim="adamw_hf",
optim="paged_adamw_8bit",
save_strategy="steps",
save_steps=1000,
save_total_limit=1,
bf16=True,
report_to=["tensorboard"],
dataloader_pin_memory=False
)
device = "cuda"
image_token = processor.tokenizer.convert_tokens_to_ids("<image>")
def collate_fn(examples):
texts = ["answer " + example["question"] + "\n" + example['multiple_choice_answer'] for example in examples]
images = [example["image"].convert("RGB") for example in examples]
tokens = processor(text=texts, images=images,
return_tensors="pt", padding="longest",
tokenize_newline_separately=False)
labels = tokens["input_ids"].clone()
labels[labels == processor.tokenizer.pad_token_id] = -100
labels[labels == image_token] = -100
tokens["labels"] = labels
tokens = tokens.to(model.dtype).to(device)
return tokens
trainer = Trainer(
model=model,
args=args,
train_dataset=dataset_train,
eval_dataset=dataset_test,
data_collator=collate_fn
)
trainer.train()
I figured it out, needed to call model = prepare_model_for_kbit_training(model)
before model = get_peft_model(model, lora_config)
chongdashu
changed discussion status to
closed