Kevin Fink commited on
Commit
7ff0a99
·
1 Parent(s): 6757426
Files changed (1) hide show
  1. app.py +4 -6
app.py CHANGED
@@ -90,7 +90,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
90
  examples['text'],
91
  max_length=max_length, # Set to None for dynamic padding
92
  truncation=True,
93
- padding='max_length',
94
  return_tensors='pt',
95
  )
96
 
@@ -99,8 +99,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
99
  examples['target'],
100
  max_length=max_length, # Set to None for dynamic padding
101
  truncation=True,
102
- padding='max_length',
103
- text_target=examples['target'],
104
  return_tensors='pt',
105
  )
106
 
@@ -131,9 +131,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
131
  )
132
  except:
133
  tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
134
- # Tokenize the dataset
135
-
136
-
137
  first_half = dataset['train'].select(range(half_size))
138
  tokenized_half = tokenize_function(first_half)
139
 
 
90
  examples['text'],
91
  max_length=max_length, # Set to None for dynamic padding
92
  truncation=True,
93
+ padding='longest',
94
  return_tensors='pt',
95
  )
96
 
 
99
  examples['target'],
100
  max_length=max_length, # Set to None for dynamic padding
101
  truncation=True,
102
+ padding='longest',
103
+ #text_target=examples['target'],
104
  return_tensors='pt',
105
  )
106
 
 
131
  )
132
  except:
133
  tokenizer = AutoTokenizer.from_pretrained('google/t5-efficient-tiny-nh8')
134
+ # Tokenize the dataset
 
 
135
  first_half = dataset['train'].select(range(half_size))
136
  tokenized_half = tokenize_function(first_half)
137