Kevin Fink commited on
Commit
5c817b9
·
1 Parent(s): 22093a9
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -24,7 +24,8 @@ def fine_tune_model(model_name, dataset_name, hub_id, num_epochs, batch_size, lr
24
 
25
  # Tokenize the dataset
26
  def tokenize_function(examples):
27
- return tokenizer(examples['text'], padding="max_length", truncation=True)
 
28
 
29
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
30
 
@@ -33,12 +34,12 @@ def fine_tune_model(model_name, dataset_name, hub_id, num_epochs, batch_size, lr
33
  output_dir='./results',
34
  eval_strategy="epoch",
35
  save_strategy='epoch',
36
- learning_rate=lr**-5,
37
  per_device_train_batch_size=batch_size,
38
  per_device_eval_batch_size=batch_size,
39
  num_train_epochs=num_epochs,
40
  weight_decay=0.01,
41
- gradient_accumulation_steps=grad**-1,
42
  load_best_model_at_end=True,
43
  metric_for_best_model="accuracy",
44
  greater_is_better=True,
@@ -49,7 +50,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, num_epochs, batch_size, lr
49
  fp16=True,
50
  lr_scheduler_type='cosine',
51
  )
52
-
53
  # Create Trainer
54
  trainer = Trainer(
55
  model=model,
 
24
 
25
  # Tokenize the dataset
26
  def tokenize_function(examples):
27
+ max_length = 256
28
+ return tokenizer(examples['text'], padding=max_length, truncation=True)
29
 
30
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
31
 
 
34
  output_dir='./results',
35
  eval_strategy="epoch",
36
  save_strategy='epoch',
37
+ learning_rate=lr*0.00001,
38
  per_device_train_batch_size=batch_size,
39
  per_device_eval_batch_size=batch_size,
40
  num_train_epochs=num_epochs,
41
  weight_decay=0.01,
42
+ gradient_accumulation_steps=grad*0.1,
43
  load_best_model_at_end=True,
44
  metric_for_best_model="accuracy",
45
  greater_is_better=True,
 
50
  fp16=True,
51
  lr_scheduler_type='cosine',
52
  )
53
+
54
  # Create Trainer
55
  trainer = Trainer(
56
  model=model,