Kevin Fink
commited on
Commit
·
5c817b9
1
Parent(s):
22093a9
init
Browse files
app.py
CHANGED
@@ -24,7 +24,8 @@ def fine_tune_model(model_name, dataset_name, hub_id, num_epochs, batch_size, lr
|
|
24 |
|
25 |
# Tokenize the dataset
|
26 |
def tokenize_function(examples):
|
27 |
-
|
|
|
28 |
|
29 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
30 |
|
@@ -33,12 +34,12 @@ def fine_tune_model(model_name, dataset_name, hub_id, num_epochs, batch_size, lr
|
|
33 |
output_dir='./results',
|
34 |
eval_strategy="epoch",
|
35 |
save_strategy='epoch',
|
36 |
-
learning_rate=lr
|
37 |
per_device_train_batch_size=batch_size,
|
38 |
per_device_eval_batch_size=batch_size,
|
39 |
num_train_epochs=num_epochs,
|
40 |
weight_decay=0.01,
|
41 |
-
gradient_accumulation_steps=grad
|
42 |
load_best_model_at_end=True,
|
43 |
metric_for_best_model="accuracy",
|
44 |
greater_is_better=True,
|
@@ -49,7 +50,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, num_epochs, batch_size, lr
|
|
49 |
fp16=True,
|
50 |
lr_scheduler_type='cosine',
|
51 |
)
|
52 |
-
|
53 |
# Create Trainer
|
54 |
trainer = Trainer(
|
55 |
model=model,
|
|
|
24 |
|
25 |
# Tokenize the dataset
|
26 |
def tokenize_function(examples):
|
27 |
+
max_length = 256
|
28 |
+
return tokenizer(examples['text'], padding=max_length, truncation=True)
|
29 |
|
30 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
31 |
|
|
|
34 |
output_dir='./results',
|
35 |
eval_strategy="epoch",
|
36 |
save_strategy='epoch',
|
37 |
+
learning_rate=lr*0.00001,
|
38 |
per_device_train_batch_size=batch_size,
|
39 |
per_device_eval_batch_size=batch_size,
|
40 |
num_train_epochs=num_epochs,
|
41 |
weight_decay=0.01,
|
42 |
+
gradient_accumulation_steps=grad*0.1,
|
43 |
load_best_model_at_end=True,
|
44 |
metric_for_best_model="accuracy",
|
45 |
greater_is_better=True,
|
|
|
50 |
fp16=True,
|
51 |
lr_scheduler_type='cosine',
|
52 |
)
|
53 |
+
|
54 |
# Create Trainer
|
55 |
trainer = Trainer(
|
56 |
model=model,
|