Dec 10, 2023

I am trying to train with my dataset but I am getting below error. Somone please help.
"---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [10], in <cell line: 9>()
2 peft_trainer = Trainer(
3 model=peft_model,
4 args=peft_training_args,
5 train_dataset=training_dataset,
6 )
8 # Start PEFT training
----> 9 peft_trainer.train()
11 # Save the PEFT-trained model
12 peft_model.save_pretrained("C:/D drive/vivek data/Mtech/Semester4/FbtExamples/peft_fine_tuned_model")

File ~\Anaconda3\lib\site-packages\transformers\trainer.py:1530, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1528 hf_hub_utils.enable_progress_bars()
1529 else:
-> 1530 return inner_training_loop(
1531 args=args,
1532 resume_from_checkpoint=resume_from_checkpoint,
1533 trial=trial,
1534 ignore_keys_for_eval=ignore_keys_for_eval,
1535 )

File ~\Anaconda3\lib\site-packages\accelerate\utils\memory.py:136, in find_executable_batch_size..decorator(*args, **kwargs)
134 raise RuntimeError("No executable batch size found, reached zero.")
135 try:
--> 136 return function(batch_size, *args, **kwargs)
137 except Exception as e:
138 if should_reduce_batch_size(e):

File ~\Anaconda3\lib\site-packages\transformers\trainer.py:1844, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1841 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
1843 with self.accelerator.accumulate(model):
-> 1844 tr_loss_step = self.training_step(model, inputs)
1846 if (
1847 args.logging_nan_inf_filter
1848 and not is_torch_tpu_available()
1849 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1850 ):
1851 # if loss is nan or inf simply add the average of previous logged losses
1852 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~\Anaconda3\lib\site-packages\transformers\trainer.py:2701, in Trainer.training_step(self, model, inputs)
2698 return loss_mb.reduce_mean().detach().to(self.args.device)
2700 with self.compute_loss_context_manager():
-> 2701 loss = self.compute_loss(model, inputs)
2703 if self.args.n_gpu > 1:
2704 loss = loss.mean() # mean() to average on multi-gpu parallel training

File ~\Anaconda3\lib\site-packages\transformers\trainer.py:2724, in Trainer.compute_loss(self, model, inputs, return_outputs)
2722 else:
2723 labels = None
-> 2724 outputs = model(**inputs)
2725 # Save past state if it exists
2726 # TODO: this needs to be fixed and made cleaner later.
2727 if self.args.past_index >= 0:

File ~\Anaconda3\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~\Anaconda3\lib\site-packages\peft\peft_model.py:1232, in PeftModelForSeq2SeqLM.forward(self, input_ids, attention_mask, inputs_embeds, decoder_input_ids, decoder_attention_mask, decoder_inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)
1230 peft_config = self.active_peft_config
1231 if not peft_config.is_prompt_learning:
-> 1232 return self.base_model(
1233 input_ids=input_ids,
1234 attention_mask=attention_mask,
1235 inputs_embeds=inputs_embeds,
1236 decoder_input_ids=decoder_input_ids,
1237 decoder_attention_mask=decoder_attention_mask,
1238 decoder_inputs_embeds=decoder_inputs_embeds,
1239 labels=labels,
1240 output_attentions=output_attentions,
1241 output_hidden_states=output_hidden_states,
1242 return_dict=return_dict,
1243 **kwargs,
1244 )
1246 batch_size = _get_batch_size(input_ids, inputs_embeds)
1247 if decoder_attention_mask is not None:
1248 # concat prompt attention mask

File ~\Anaconda3\lib\site-packages\torch\nn\modules\module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194 return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~\Anaconda3\lib\site-packages\peft\tuners\tuners_utils.py:108, in BaseTuner.forward(self, *args, **kwargs)
107 def forward(self, *args: Any, **kwargs: Any):
--> 108 return self.model.forward(*args, **kwargs)

TypeError: forward() got an unexpected keyword argument 'decoder_input_ids'"

ybelkada

Dec 10, 2023

Hi @vivkhandelwal thanks for the issue, can you share the full script you are using?

vivkhandelwal

Dec 11, 2023

Hi , Here is script. Please keep in mind , i am very new to to LLM and can do some mistake. Your help is highly appreciated.
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM, AdamW, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, TaskType

def print_number_of_trainable_model_parameters(model):
trainable_model_params = 0
all_model_params = 0
for _, param in model.named_parameters():
all_model_params += param.numel()
if param.requires_grad:
trainable_model_params += param.numel()
return f"Trainable model parameters: {trainable_model_params}\nAll model parameters: {all_model_params}\nPercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-alpha")
original_model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/zephyr-7b-alpha",device_map="auto",load_in_8bit=True)

Print the number of trainable parameters in the original model

print(print_number_of_trainable_model_parameters(original_model))

Load prompts and file names from Excel file

excel_file_path = "/content/FbtExamples/Fbtprompt.xlsx" # Change this to the appropriate file path
df = pd.read_excel(excel_file_path)

Specify the folder where XML files are located

xml_folder = "/content/FbtExamples" # Change this to the appropriate folder path
df.head()
class CustomDataset(Dataset):
def init(self, dataframe, tokenizer, xml_folder, max_length=512):
self.data = dataframe
self.tokenizer = tokenizer
self.xml_folder = xml_folder
self.max_length = max_length

def __len__(self):
    return len(self.data)

def __getitem__(self, idx):
    prompt = self.data['prompt'].iloc[idx]
    xml_filename = self.data['file_name'].iloc[idx]
    xml_filepath = os.path.join(self.xml_folder, xml_filename)

    with open(xml_filepath, 'r') as file:
        xml_code = file.read()

    inputs = self.tokenizer(prompt, return_tensors="pt", max_length=2048, truncation=True,padding="max_length")
    labels = self.tokenizer(xml_code, return_tensors="pt", max_length=2048, truncation=True,padding="max_length")
    # Remove 'decoder_input_ids' from inputs
   # inputs.pop("decoder_input_ids", None)

    # Remove 'input_ids' from labels (PEFT model may not need it)
    # labels.pop("input_ids", None)
    #return {"input_ids": inputs["input_ids"].squeeze(), "labels": labels["input_ids"].squeeze()}
    #return {"input_ids": inputs["input_ids"], "labels": labels["input_ids"]}
    return {
        "input_ids": inputs["input_ids"].squeeze(),  # Squeeze the dimension for input_ids
        "attention_mask": inputs["attention_mask"].squeeze(),  # If attention_mask is used
        "labels": labels["input_ids"].squeeze()  # Squeeze the dimension for labels
    }

# Configure PEFT

lora_config = LoraConfig(
r=16, # Rank
lora_alpha=16,
target_modules=["q_proj", "k_proj"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM

Wrap the model with PEFT

peft_model = get_peft_model(original_model, lora_config)

Print the number of trainable parameters

print(print_number_of_trainable_model_parameters(peft_model))

Wrap the model with PEFT

peft_model = get_peft_model(original_model, lora_config)

Print the number of trainable parameters in the PEFT model

print(print_number_of_trainable_model_parameters(peft_model))

Training dataset

training_dataset = CustomDataset(df, tokenizer, xml_folder)

Define the output directory for PEFT training

peft_output_dir = "/content/peft_output" # Change this to the appropriate folder path

Define PEFT training arguments

peft_training_args = TrainingArguments(
output_dir=peft_output_dir,
auto_find_batch_size=True,
learning_rate=1e-3, # Higher learning rate than full fine-tuning.
num_train_epochs=1,
logging_steps=1,
max_steps=1,
per_device_train_batch_size=4, # Reduce batch size to 8
)

Create the Trainer for PEFT

peft_trainer = Trainer(
model=peft_model,
args=peft_training_args,
train_dataset=training_dataset,
)

Start PEFT training

peft_trainer.train()

Save the PEFT-trained model

peft_model.save_pretrained("/content/peft_fine_tuned_model") # Change this to the appropriate folder path

HuggingFaceH4
/

zephyr-7b-alpha

How to apply PEFT in this model

Print the number of trainable parameters in the original model

Load prompts and file names from Excel file

Specify the folder where XML files are located

Wrap the model with PEFT

Print the number of trainable parameters

Wrap the model with PEFT

Print the number of trainable parameters in the PEFT model

Training dataset

Define the output directory for PEFT training

Define PEFT training arguments

Create the Trainer for PEFT

Start PEFT training

Save the PEFT-trained model