Kevin Fink commited on
Commit
d86b87f
·
1 Parent(s): d085a88
Files changed (1) hide show
  1. app.py +10 -18
app.py CHANGED
@@ -1,25 +1,12 @@
1
  import spaces
2
  import gradio as gr
3
- from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM, TrainerCallback
4
  from transformers import DataCollatorForSeq2Seq
5
  from datasets import load_dataset
6
  import traceback
7
  from huggingface_hub import login
8
  from peft import get_peft_model, LoraConfig
9
 
10
- class LoggingCallback(TrainerCallback):
11
- def on_step_end(self, args, state, control, kwargs):
12
- # Log the learning rate
13
- current_lr = state.optimizer.param_groups[0]['lr']
14
- print(f"Current Learning Rate: {current_lr}")
15
-
16
- def on_epoch_end(self, args, state, control, kwargs):
17
- # Log the error rate (assuming you have a metric to calculate it)
18
- # Here we assume you have a way to get the validation loss
19
- if state.best_metric is not None:
20
- error_rate = 1 - state.best_metric # Assuming best_metric is accuracy
21
- print(f"Current Error Rate: {error_rate:.4f}")
22
-
23
  @spaces.GPU(duration=120)
24
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
25
  try:
@@ -38,7 +25,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
38
  model = get_peft_model(model, lora_config)
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
 
41
- max_length = 128
42
 
43
  # Tokenize the dataset
44
  def tokenize_function(examples):
@@ -71,10 +58,9 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
71
  training_args = TrainingArguments(
72
  output_dir='./results',
73
  eval_strategy="epoch",
74
- save_strategy='epoch',
75
  learning_rate=lr*0.000001,
76
  per_device_train_batch_size=int(batch_size),
77
- per_device_eval_batch_size=1,
78
  num_train_epochs=int(num_epochs),
79
  weight_decay=0.01,
80
  gradient_accumulation_steps=int(grad),
@@ -87,8 +73,14 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
87
  hub_model_id=hub_id.strip(),
88
  fp16=True,
89
  #lr_scheduler_type='cosine',
 
 
90
  )
91
-
 
 
 
 
92
  # Create Trainer
93
  trainer = Trainer(
94
  model=model,
 
1
  import spaces
2
  import gradio as gr
3
+ from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
4
  from transformers import DataCollatorForSeq2Seq
5
  from datasets import load_dataset
6
  import traceback
7
  from huggingface_hub import login
8
  from peft import get_peft_model, LoraConfig
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  @spaces.GPU(duration=120)
11
  def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
12
  try:
 
25
  model = get_peft_model(model, lora_config)
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
 
28
+ max_length = 256
29
 
30
  # Tokenize the dataset
31
  def tokenize_function(examples):
 
58
  training_args = TrainingArguments(
59
  output_dir='./results',
60
  eval_strategy="epoch",
 
61
  learning_rate=lr*0.000001,
62
  per_device_train_batch_size=int(batch_size),
63
+ per_device_eval_batch_size=int(batch_size),
64
  num_train_epochs=int(num_epochs),
65
  weight_decay=0.01,
66
  gradient_accumulation_steps=int(grad),
 
73
  hub_model_id=hub_id.strip(),
74
  fp16=True,
75
  #lr_scheduler_type='cosine',
76
+ save_steps=500, # Save checkpoint every 500 steps
77
+ save_total_limit=3,
78
  )
79
+ # Check if a checkpoint exists and load it
80
+ if os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir):
81
+ print("Loading model from checkpoint...")
82
+ model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
83
+
84
  # Create Trainer
85
  trainer = Trainer(
86
  model=model,