Spaces:

shorecode
/

gradio-3

Sleeping

Kevin Fink commited on Dec 5, 2024

Commit

8bef298

1 Parent(s): 9a0b862

init

Files changed (1) hide show

app.py CHANGED Viewed

@@ -40,7 +40,16 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
         # Tokenize the dataset
         def tokenize_function(examples):
             max_length = 32
-            return tokenizer(examples['text'], max_length=max_length, truncation=True)
         tokenized_datasets = dataset.map(tokenize_function, batched=True)

         # Tokenize the dataset
         def tokenize_function(examples):
             max_length = 32
+            # Assuming 'text' is the input and 'target' is the expected output
+            model_inputs = tokenizer(examples['text'], max_length=max_length, truncation=True)
+            # Setup the decoder input IDs (shifted right)
+            with tokenizer.as_target_tokenizer():
+                labels = tokenizer(examples['target'], max_length=max_length, truncation=True)
+            # Add labels to the model inputs
+            model_inputs["labels"] = labels["input_ids"]
+            return model_inputs
         tokenized_datasets = dataset.map(tokenize_function, batched=True)