Sofa321 commited on
Commit
74a8d0e
·
verified ·
1 Parent(s): ba0cabc

Delete train-model.py

Browse files
Files changed (1) hide show
  1. train-model.py +0 -35
train-model.py DELETED
@@ -1,35 +0,0 @@
1
- from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
2
- from datasets import load_dataset
3
-
4
- MODEL_NAME = "indobenchmark/indobert-base-p2"
5
- dataset = load_dataset("csv", data_files="dataset.csv")
6
-
7
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
-
9
- def preprocess(data):
10
- return tokenizer(data['pertanyaan'], padding="max_length", truncation=True)
11
-
12
- dataset = dataset.map(preprocess, batched=True)
13
- dataset = dataset.rename_column("label", "labels")
14
- dataset.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
15
-
16
- model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
17
-
18
- training_args = TrainingArguments(
19
- output_dir="./results",
20
- evaluation_strategy="epoch",
21
- learning_rate=2e-5,
22
- per_device_train_batch_size=16,
23
- num_train_epochs=3,
24
- save_total_limit=2
25
- )
26
-
27
- trainer = Trainer(
28
- model=model,
29
- args=training_args,
30
- train_dataset=dataset['train'],
31
- eval_dataset=dataset['validation']
32
- )
33
-
34
- trainer.train()
35
- model.save_pretrained("./fine_tuned_model")