--- license: apache-2.0 language: - en library_name: pythae tags: - music --- --- license: agpl-3.0 ---from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments from datasets import load_dataset import numpy as np # Carica il modello e il tokenizer tokenizer = GPT2Tokenizer.from_pretrained('gpt2') model = GPT2LMHeadModel.from_pretrained('gpt2') # Carica un dataset personalizzato (esempio con CSV) dataset = load_dataset('csv', data_files={'train': 'path/to/train.csv', 'test': 'path/to/test.csv'}) # Tokenizzazione del dataset def tokenize_function(examples): return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128) tokenized_datasets = dataset.map(tokenize_function, batched=True) # Configura i parametri di addestramento training_args = TrainingArguments( output_dir='./results', num_train_epochs=3, per_device_train_batch_size=4, save_steps=10_000, save_total_limit=2, evaluation_strategy="epoch" ) # Funzione per calcolare le metriche def compute_metrics(eval_pred): logits, labels = eval_pred predictions = np.argmax(logits, axis=-1) return metric.compute(predictions=predictions, references=labels) # Crea il trainer trainer = Trainer( model=model, args=training_args, train_dataset=tokenized_datasets['train'], eval_dataset=tokenized_datasets['test'], compute_metrics=compute_metrics ) # Esegui l'addestramento trainer.train()