Spaces:

shorecode
/

gradio-3

Sleeping

Kevin Fink commited on Dec 7, 2024

Commit

bc59d39

1 Parent(s): 69cfd5f

dev

Files changed (1) hide show

app.py CHANGED Viewed

@@ -83,6 +83,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
             print("Loading model from checkpoint...")
             model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
         def tokenize_function(examples):
             # Assuming 'text' is the input and 'target' is the expected output
@@ -115,7 +117,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
         third_size = train_size // 3
         max_length = model.get_input_embeddings().weight.shape[0]
         try:
-            saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
             if 'test' in saved_dataset.keys():
                 print("FOUND TEST")
                 # Create Trainer
@@ -144,7 +146,6 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
                 return
         except:
-            tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
             # Tokenize the dataset
             first_third = dataset['train'].select(range(third_size))
             dataset['train'] = first_third

             print("Loading model from checkpoint...")
             model = AutoModelForSeq2SeqLM.from_pretrained(training_args.output_dir)
+        tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
         def tokenize_function(examples):
             # Assuming 'text' is the input and 'target' is the expected output
         third_size = train_size // 3
         max_length = model.get_input_embeddings().weight.shape[0]
         try:
+            saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
             if 'test' in saved_dataset.keys():
                 print("FOUND TEST")
                 # Create Trainer
                 return
         except:
             # Tokenize the dataset
             first_third = dataset['train'].select(range(third_size))
             dataset['train'] = first_third