Kevin Fink commited on
Commit
01c6646
·
1 Parent(s): 782c88b
Files changed (1) hide show
  1. app.py +25 -23
app.py CHANGED
@@ -114,8 +114,30 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
114
  # Load the dataset
115
  max_length = model.get_input_embeddings().weight.shape[0]
116
  try:
117
- saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
118
- if os.path.isfile(f'/data/{hub_id.strip()}_validation_dataset'):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  dataset = load_dataset(dataset_name.strip())
120
  train_size = len(dataset['train'])
121
  third_size = train_size // 3
@@ -133,27 +155,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
133
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
134
  return 'THIRD THIRD LOADED'
135
 
136
- if not os.path.isfile(f'/data/{hub_id.strip()}_train_dataset3'):
137
- train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset3')
138
- if len(dataset['train']) == len(train_dataset['train']):
139
- dataset = load_dataset(dataset_name.strip())
140
- del dataset['train']
141
- del dataset['validation']
142
- test_set = dataset.map(tokenize_function, batched=True)
143
- test_set['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
144
- return 'TRAINING DONE'
145
- else:
146
- train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset3')
147
- saved_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
148
- print("FOUND TEST")
149
- # Create Trainer
150
- trainer = Trainer(
151
- model=model,
152
- args=training_args,
153
- train_dataset=train_dataset,
154
- eval_dataset=saved_test_dataset,
155
- compute_metrics=compute_metrics,
156
- )
157
  if os.path.isfile(f'/data/{hub_id.strip()}_train_dataset' and not os.access(f'/data/{hub_id.strip()}_train_dataset3')):
158
  dataset = load_dataset(dataset_name.strip())
159
  train_size = len(dataset['train'])
 
114
  # Load the dataset
115
  max_length = model.get_input_embeddings().weight.shape[0]
116
  try:
117
+ saved_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset')
118
+ if os.path.isfile(f'/data/{hub_id.strip()}_test_dataset'):
119
+ train_dataset = load_from_disk(f'/data/{hub_id.strip()}_train_dataset3')
120
+ saved_test_dataset = load_from_disk(f'/data/{hub_id.strip()}_test_dataset')
121
+ print("FOUND TEST")
122
+ # Create Trainer
123
+ trainer = Trainer(
124
+ model=model,
125
+ args=training_args,
126
+ train_dataset=train_dataset,
127
+ eval_dataset=saved_test_dataset,
128
+ compute_metrics=compute_metrics,
129
+ )
130
+
131
+ elif os.path.isfile(f'/data/{hub_id.strip()}_train_dataset3'):
132
+ if len(dataset['train']) == len(train_dataset['train']):
133
+ dataset = load_dataset(dataset_name.strip())
134
+ del dataset['train']
135
+ del dataset['validation']
136
+ test_set = dataset.map(tokenize_function, batched=True)
137
+ test_set['test'].save_to_disk(f'/data/{hub_id.strip()}_test_dataset')
138
+ return 'TRAINING DONE'
139
+
140
+ elif os.path.isfile(f'/data/{hub_id.strip()}_validation_dataset'):
141
  dataset = load_dataset(dataset_name.strip())
142
  train_size = len(dataset['train'])
143
  third_size = train_size // 3
 
155
  dataset['train'].save_to_disk(f'/data/{hub_id.strip()}_train_dataset3')
156
  return 'THIRD THIRD LOADED'
157
 
158
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  if os.path.isfile(f'/data/{hub_id.strip()}_train_dataset' and not os.access(f'/data/{hub_id.strip()}_train_dataset3')):
160
  dataset = load_dataset(dataset_name.strip())
161
  train_size = len(dataset['train'])