Kevin Fink commited on
Commit
fafbcd2
·
1 Parent(s): a5454ef
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -112,14 +112,11 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
112
 
113
  #max_length = 512
114
  # Load the dataset
115
- train_size = len(dataset['train'])
116
- third_size = train_size // 3
117
  max_length = model.get_input_embeddings().weight.shape[0]
118
  try:
119
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
120
  try:
121
- load_from_disk(f'/data/{hub_id.strip()}_validation_dataset')
122
- dataset = load_dataset(dataset_name.strip())
123
  try:
124
  saved_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
125
  print("FOUND TEST")
@@ -134,6 +131,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
134
  )
135
  except:
136
  dataset = load_dataset(dataset_name.strip())
 
 
137
  print("FOUND VALIDATION")
138
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
139
  third_third = dataset['train'].select(range(third_size*2, train_size))
@@ -145,6 +144,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
145
  return 'THIRD THIRD LOADED'
146
  except:
147
  dataset = load_dataset(dataset_name.strip())
 
 
148
  second_third = dataset['train'].select(range(third_size, third_size*2))
149
  dataset['train'] = second_third
150
  del dataset['test']
@@ -156,6 +157,8 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
156
 
157
  except:
158
  dataset = load_dataset(dataset_name.strip())
 
 
159
  # Tokenize the dataset
160
  first_third = dataset['train'].select(range(third_size))
161
  dataset['train'] = first_third
 
112
 
113
  #max_length = 512
114
  # Load the dataset
 
 
115
  max_length = model.get_input_embeddings().weight.shape[0]
116
  try:
117
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
118
  try:
119
+ load_from_disk(f'/data/{hub_id.strip()}_validation_dataset')
 
120
  try:
121
  saved_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
122
  print("FOUND TEST")
 
131
  )
132
  except:
133
  dataset = load_dataset(dataset_name.strip())
134
+ train_size = len(dataset['train'])
135
+ third_size = train_size // 3
136
  print("FOUND VALIDATION")
137
  saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset2')
138
  third_third = dataset['train'].select(range(third_size*2, train_size))
 
144
  return 'THIRD THIRD LOADED'
145
  except:
146
  dataset = load_dataset(dataset_name.strip())
147
+ train_size = len(dataset['train'])
148
+ third_size = train_size // 3
149
  second_third = dataset['train'].select(range(third_size, third_size*2))
150
  dataset['train'] = second_third
151
  del dataset['test']
 
157
 
158
  except:
159
  dataset = load_dataset(dataset_name.strip())
160
+ train_size = len(dataset['train'])
161
+ third_size = train_size // 3
162
  # Tokenize the dataset
163
  first_third = dataset['train'].select(range(third_size))
164
  dataset['train'] = first_third