Kevin Fink commited on
Commit
c1e5fc9
·
1 Parent(s): 7ff0a99
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -117,7 +117,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
117
  try:
118
  tokenized_first_half = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
119
  second_half = dataset['train'].select(range(half_size, train_size))
120
- tokenized_second_half = tokenize_function(second_half)
121
  tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
122
  tokenized_test_dataset = tokenize_function(dataset['test'])
123
 
@@ -133,7 +133,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
133
  tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
134
  # Tokenize the dataset
135
  first_half = dataset['train'].select(range(half_size))
136
- tokenized_half = tokenize_function(first_half)
137
 
138
  tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
139
 
 
117
  try:
118
  tokenized_first_half = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
119
  second_half = dataset['train'].select(range(half_size, train_size))
120
+ tokenized_second_half = tokenize_function(second_half.to_dict())
121
  tokenized_train_dataset = concatenate_datasets([tokenized_first_half, tokenized_second_half])
122
  tokenized_test_dataset = tokenize_function(dataset['test'])
123
 
 
133
  tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
134
  # Tokenize the dataset
135
  first_half = dataset['train'].select(range(half_size))
136
+ tokenized_half = tokenize_function(first_half.to_dict())
137
 
138
  tokenized_half.save_to_disk(f'/data/{hub_id.strip()}_train_dataset')
139