Spaces:

shorecode
/

gradio-3

Sleeping

Kevin Fink commited on Dec 7, 2024

Commit

7ff0a99

1 Parent(s): 6757426

dev

Files changed (1) hide show

app.py CHANGED Viewed

@@ -90,7 +90,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
                 examples['text'],
                 max_length=max_length,  # Set to None for dynamic padding
                 truncation=True,
-                padding='max_length',
                 return_tensors='pt',
             )
@@ -99,8 +99,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
                 examples['target'],
                 max_length=max_length,  # Set to None for dynamic padding
                 truncation=True,
-                padding='max_length',
-                text_target=examples['target'],
                 return_tensors='pt',
             )
@@ -131,9 +131,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
             )
         except:
             tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
-            # Tokenize the dataset
             first_half = dataset['train'].select(range(half_size))
             tokenized_half = tokenize_function(first_half)

                 examples['text'],
                 max_length=max_length,  # Set to None for dynamic padding
                 truncation=True,
+                padding='longest',
                 return_tensors='pt',
             )
                 examples['target'],
                 max_length=max_length,  # Set to None for dynamic padding
                 truncation=True,
+                padding='longest',
+                #text_target=examples['target'],
                 return_tensors='pt',
             )
             )
         except:
             tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
+            # Tokenize the dataset
             first_half = dataset['train'].select(range(half_size))
             tokenized_half = tokenize_function(first_half)