from transformers import AutoTokenizer, AutoModelForQuestionAnswering, Trainer, TrainingArguments import torch tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") model = AutoModelForQuestionAnswering.from_pretrained("bert-base-uncased") train_dataset = ("squad") def tokenize_function(examples): return tokenizer( examples["questions"], examples["context"], truncation="only_second", max_length=512, padding="max_length", stride=128, return_overflowing_tokens=True, return_offsets_mapping=True, return_attention_mask=True, return_token_type_ids=True, ) tokenized_datasets = dataset.map( tokenize_function, batched=True, remove_columns=["id", "title", "question", "context"], ) training_args = TrainingArguments( per_device_train_batch_size=8, num_train_epochs=3, logging_dir='./logs' ) def compute_metrics(p): return {} trainer = Trainer( model=model, args=training_args, train_dataset= tokenized_datasets["train"], tokenizer=tokenizer, compute_metrics=compute_metrics, ) trainer.train()