Sofa321 commited on
Commit
ba0cabc
·
verified ·
1 Parent(s): af030a0

Create train-model.py

Browse files
Files changed (1) hide show
  1. train-model.py +35 -0
train-model.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
2
+ from datasets import load_dataset
3
+
4
+ MODEL_NAME = "indobenchmark/indobert-base-p2"
5
+ dataset = load_dataset("csv", data_files="dataset.csv")
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
+
9
+ def preprocess(data):
10
+ return tokenizer(data['pertanyaan'], padding="max_length", truncation=True)
11
+
12
+ dataset = dataset.map(preprocess, batched=True)
13
+ dataset = dataset.rename_column("label", "labels")
14
+ dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
15
+
16
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
17
+
18
+ training_args = TrainingArguments(
19
+ output_dir="./results",
20
+ evaluation_strategy="epoch",
21
+ learning_rate=2e-5,
22
+ per_device_train_batch_size=16,
23
+ num_train_epochs=3,
24
+ save_total_limit=2
25
+ )
26
+
27
+ trainer = Trainer(
28
+ model=model,
29
+ args=training_args,
30
+ train_dataset=dataset['train'],
31
+ eval_dataset=dataset['validation']
32
+ )
33
+
34
+ trainer.train()
35
+ model.save_pretrained("./fine_tuned_model")