Kevin Fink
commited on
Commit
·
22d6887
1
Parent(s):
6cbdc2a
deve
Browse files
app.py
CHANGED
@@ -115,15 +115,16 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
115 |
)
|
116 |
|
117 |
# Setup the decoder input IDs (shifted right)
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
|
|
127 |
#labels["input_ids"] = [
|
128 |
# [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
|
129 |
#]
|
@@ -150,7 +151,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
150 |
train_dataset=train_dataset,
|
151 |
eval_dataset=saved_test_dataset,
|
152 |
#compute_metrics=compute_metrics,
|
153 |
-
|
154 |
#processing_class=tokenizer,
|
155 |
)
|
156 |
|
|
|
115 |
)
|
116 |
|
117 |
# Setup the decoder input IDs (shifted right)
|
118 |
+
with tokenizer.as_target_tokenizer():
|
119 |
+
labels = tokenizer(
|
120 |
+
examples['target'],
|
121 |
+
max_length=max_length, # Set to None for dynamic padding
|
122 |
+
truncation=True,
|
123 |
+
padding='max_length',
|
124 |
+
#text_target=examples['target'],
|
125 |
+
#return_tensors='pt',
|
126 |
+
#padding=True,
|
127 |
+
)
|
128 |
#labels["input_ids"] = [
|
129 |
# [(l if l != tokenizer.pad_token_id else -100) for l in label] for label in labels["input_ids"]
|
130 |
#]
|
|
|
151 |
train_dataset=train_dataset,
|
152 |
eval_dataset=saved_test_dataset,
|
153 |
#compute_metrics=compute_metrics,
|
154 |
+
data_collator=data_collator,
|
155 |
#processing_class=tokenizer,
|
156 |
)
|
157 |
|