bumpe commited on
Commit
f2cb073
1 Parent(s): ba51c28

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +54 -3
README.md CHANGED
@@ -1,3 +1,54 @@
1
- ---
2
- license: agpl-3.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ library_name: pythae
6
+ tags:
7
+ - music
8
+ ---
9
+ ---
10
+ license: agpl-3.0
11
+ ---from transformers import GPT2Tokenizer, GPT2LMHeadModel, Trainer, TrainingArguments
12
+ from datasets import load_dataset
13
+ import numpy as np
14
+
15
+ # Carica il modello e il tokenizer
16
+ tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
17
+ model = GPT2LMHeadModel.from_pretrained('gpt2')
18
+
19
+ # Carica un dataset personalizzato (esempio con CSV)
20
+ dataset = load_dataset('csv', data_files={'train': 'path/to/train.csv', 'test': 'path/to/test.csv'})
21
+
22
+ # Tokenizzazione del dataset
23
+ def tokenize_function(examples):
24
+ return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128)
25
+
26
+ tokenized_datasets = dataset.map(tokenize_function, batched=True)
27
+
28
+ # Configura i parametri di addestramento
29
+ training_args = TrainingArguments(
30
+ output_dir='./results',
31
+ num_train_epochs=3,
32
+ per_device_train_batch_size=4,
33
+ save_steps=10_000,
34
+ save_total_limit=2,
35
+ evaluation_strategy="epoch"
36
+ )
37
+
38
+ # Funzione per calcolare le metriche
39
+ def compute_metrics(eval_pred):
40
+ logits, labels = eval_pred
41
+ predictions = np.argmax(logits, axis=-1)
42
+ return metric.compute(predictions=predictions, references=labels)
43
+
44
+ # Crea il trainer
45
+ trainer = Trainer(
46
+ model=model,
47
+ args=training_args,
48
+ train_dataset=tokenized_datasets['train'],
49
+ eval_dataset=tokenized_datasets['test'],
50
+ compute_metrics=compute_metrics
51
+ )
52
+
53
+ # Esegui l'addestramento
54
+ trainer.train()