Kevin Fink commited on
Commit
6249921
·
1 Parent(s): 88673c0
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -236,14 +236,18 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
236
  print(f'{entry}: {current_dir}')
237
  except:
238
  pass
 
 
 
 
 
239
  try:
240
  train_result = trainer.train(resume_from_checkpoint=True)
241
  except:
242
  checkpoint_dir = training_args.output_dir
243
  # If the trainer_state.json is missing, look for the previous checkpoint
244
- print(f"Checkpoint {checkpoint_dir} is missing 'trainer_state.json'. Looking for previous checkpoints...")
245
- previous_checkpoints = sorted(glob.glob(os.path.join(os.path.dirname(checkpoint_dir), 'checkpoint-*')), key=os.path.getmtime)
246
- print(previous_checkpoints)
247
  if previous_checkpoints:
248
  # Load the most recent previous checkpoint
249
  last_checkpoint = previous_checkpoints[-2]
 
236
  print(f'{entry}: {current_dir}')
237
  except:
238
  pass
239
+
240
+ def get_checkpoint_int(s):
241
+ int_index = s.find('-')
242
+ return int(s[int_index+1:])
243
+
244
  try:
245
  train_result = trainer.train(resume_from_checkpoint=True)
246
  except:
247
  checkpoint_dir = training_args.output_dir
248
  # If the trainer_state.json is missing, look for the previous checkpoint
249
+ previous_checkpoints = sorted(os.listdir("/data/results"), key=get_checkpoint_int)
250
+
 
251
  if previous_checkpoints:
252
  # Load the most recent previous checkpoint
253
  last_checkpoint = previous_checkpoints[-2]