Kevin Fink
commited on
Commit
·
c1e5fc9
1
Parent(s):
7ff0a99
dev
Browse files
app.py
CHANGED
@@ -117,7 +117,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
117 |
try:
|
118 |
tokenized_first_half = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
119 |
second_half = dataset['train'].select(range(half_size, train_size))
|
120 |
-
tokenized_second_half = tokenize_function(second_half)
|
121 |
tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
|
122 |
tokenized_test_dataset = tokenize_function(dataset['test'])
|
123 |
|
@@ -133,7 +133,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
133 |
tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
|
134 |
# Tokenize the dataset
|
135 |
first_half = dataset['train'].select(range(half_size))
|
136 |
-
tokenized_half = tokenize_function(first_half)
|
137 |
|
138 |
tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
139 |
|
|
|
117 |
try:
|
118 |
tokenized_first_half = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
|
119 |
second_half = dataset['train'].select(range(half_size, train_size))
|
120 |
+
tokenized_second_half = tokenize_function(second_half.to_dict())
|
121 |
tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
|
122 |
tokenized_test_dataset = tokenize_function(dataset['test'])
|
123 |
|
|
|
133 |
tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
|
134 |
# Tokenize the dataset
|
135 |
first_half = dataset['train'].select(range(half_size))
|
136 |
+
tokenized_half = tokenize_function(first_half.to_dict())
|
137 |
|
138 |
tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
|
139 |
|