Spaces:

shorecode
/

gradio-3

Sleeping

App Files Files Community

Kevin Fink commited on Dec 5, 2024

Commit

d86b87f

1 Parent(s): d085a88

init

Browse files

Files changed (1) hide show

app.py +10 -18

app.py CHANGED Viewed

@@ -1,25 +1,12 @@
 import spaces
 import gradio as gr
-from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM, TrainerCallback
 from transformers import DataCollatorForSeq2Seq
 from datasets import load_dataset
 import traceback
 from huggingface_hub import login
 from peft import get_peft_model, LoraConfig
-class LoggingCallback(TrainerCallback):
-    def on_step_end(self, args, state, control, kwargs):
-        # Log the learning rate
-        current_lr = state.optimizer.param_groups[0]['lr']
-        print(f"Current Learning Rate: {current_lr}")
-    def on_epoch_end(self, args, state, control, kwargs):
-        # Log the error rate (assuming you have a metric to calculate it)
-        # Here we assume you have a way to get the validation loss
-        if state.best_metric is not None:
-            error_rate = 1 - state.best_metric  # Assuming best_metric is accuracy
-            print(f"Current Error Rate: {error_rate:.4f}")
 @spaces.GPU(duration=120)
 def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
     try:
@@ -38,7 +25,7 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
         model = get_peft_model(model, lora_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
-        max_length = 128
         # Tokenize the dataset
         def tokenize_function(examples):
@@ -71,10 +58,9 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
         training_args = TrainingArguments(
             output_dir='./results',
             eval_strategy="epoch",
-            save_strategy='epoch',
             learning_rate=lr*0.000001,
             per_device_train_batch_size=int(batch_size),
-            per_device_eval_batch_size=1,
             num_train_epochs=int(num_epochs),
             weight_decay=0.01,
             gradient_accumulation_steps=int(grad),
@@ -87,8 +73,14 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
             hub_model_id=hub_id.strip(),
             fp16=True,
             #lr_scheduler_type='cosine',
         )
         # Create Trainer
         trainer = Trainer(
             model=model,

 import spaces
 import gradio as gr
+from transformers import Trainer, TrainingArguments, AutoTokenizer, AutoModelForSeq2SeqLM
 from transformers import DataCollatorForSeq2Seq
 from datasets import load_dataset
 import traceback
 from huggingface_hub import login
 from peft import get_peft_model, LoraConfig
 @spaces.GPU(duration=120)
 def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch_size, lr, grad):
     try:
         model = get_peft_model(model, lora_config)
         tokenizer = AutoTokenizer.from_pretrained(model_name)
+        max_length = 256
         # Tokenize the dataset
         def tokenize_function(examples):
         training_args = TrainingArguments(
             output_dir='./results',
             eval_strategy="epoch",
             learning_rate=lr*0.000001,
             per_device_train_batch_size=int(batch_size),
+            per_device_eval_batch_size=int(batch_size),
             num_train_epochs=int(num_epochs),
             weight_decay=0.01,
             gradient_accumulation_steps=int(grad),
             hub_model_id=hub_id.strip(),
             fp16=True,
             #lr_scheduler_type='cosine',
+            save_steps=500,  # Save checkpoint every 500 steps
+            save_total_limit=3,
         )
+        # Check if a checkpoint exists and load it
+        if os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir):
+            print("Loading model from checkpoint...")
+            model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
         # Create Trainer
         trainer = Trainer(
             model=model,