Kevin Fink
commited on
Commit
·
8bef298
1
Parent(s):
9a0b862
init
Browse files
app.py
CHANGED
@@ -40,7 +40,16 @@ def fine_tune_model(model_name, dataset_name, hub_id, api_key, num_epochs, batch
|
|
40 |
# Tokenize the dataset
|
41 |
def tokenize_function(examples):
|
42 |
max_length = 32
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
46 |
|
|
|
40 |
# Tokenize the dataset
|
41 |
def tokenize_function(examples):
|
42 |
max_length = 32
|
43 |
+
# Assuming 'text' is the input and 'target' is the expected output
|
44 |
+
model_inputs = tokenizer(examples['text'], max_length=max_length, truncation=True)
|
45 |
+
|
46 |
+
# Setup the decoder input IDs (shifted right)
|
47 |
+
with tokenizer.as_target_tokenizer():
|
48 |
+
labels = tokenizer(examples['target'], max_length=max_length, truncation=True)
|
49 |
+
|
50 |
+
# Add labels to the model inputs
|
51 |
+
model_inputs["labels"] = labels["input_ids"]
|
52 |
+
return model_inputs
|
53 |
|
54 |
tokenized_datasets = dataset.map(tokenize_function, batched=True)
|
55 |
|