Kevin Fink commited on
Commit
8bef298
·
1 Parent(s): 9a0b862
Files changed (1) hide show
  1. app.py +10 -1
app.py CHANGED
@@ -40,7 +40,16 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
40
  # Tokenize the dataset
41
  def tokenize_function(examples):
42
  max_length = 32
43
- return tokenizer(examples['text'], max_length=max_length, truncation=True)
 
 
 
 
 
 
 
 
 
44
 
45
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
46
 
 
40
  # Tokenize the dataset
41
  def tokenize_function(examples):
42
  max_length = 32
43
+ # Assuming 'text' is the input and 'target' is the expected output
44
+ model_inputs = tokenizer(examples['text'], max_length=max_length, truncation=True)
45
+
46
+ # Setup the decoder input IDs (shifted right)
47
+ with tokenizer.as_target_tokenizer():
48
+ labels = tokenizer(examples['target'], max_length=max_length, truncation=True)
49
+
50
+ # Add labels to the model inputs
51
+ model_inputs["labels"] = labels["input_ids"]
52
+ return model_inputs
53
 
54
  tokenized_datasets = dataset.map(tokenize_function, batched=True)
55