Kevin Fink commited on
Commit
22d6887
·
1 Parent(s): 6cbdc2a
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -115,15 +115,16 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
115
  )
116
 
117
  # Setup the decoder input IDs (shifted right)
118
- labels = tokenizer(
119
- examples['target'],
120
- max_length=max_length, # Set to None for dynamic padding
121
- truncation=True,
122
- padding='max_length',
123
- #text_target=examples['target'],
124
- #return_tensors='pt',
125
- #padding=True,
126
- )
 
127
  #labels["input_ids"] = [
128
  # [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
129
  #]
@@ -150,7 +151,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
150
  train_dataset=train_dataset,
151
  eval_dataset=saved_test_dataset,
152
  #compute_metrics=compute_metrics,
153
- #data_collator=data_collator,
154
  #processing_class=tokenizer,
155
  )
156
 
 
115
  )
116
 
117
  # Setup the decoder input IDs (shifted right)
118
+ with tokenizer.as_target_tokenizer():
119
+ labels = tokenizer(
120
+ examples['target'],
121
+ max_length=max_length, # Set to None for dynamic padding
122
+ truncation=True,
123
+ padding='max_length',
124
+ #text_target=examples['target'],
125
+ #return_tensors='pt',
126
+ #padding=True,
127
+ )
128
  #labels["input_ids"] = [
129
  # [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
130
  #]
 
151
  train_dataset=train_dataset,
152
  eval_dataset=saved_test_dataset,
153
  #compute_metrics=compute_metrics,
154
+ data_collator=data_collator,
155
  #processing_class=tokenizer,
156
  )
157