Kevin Fink commited on
Commit
5b51e47
·
1 Parent(s): 334f1e5
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -244,6 +244,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
244
  try:
245
  train_result = trainer.train(resume_from_checkpoint=True)
246
  except:
 
247
  checkpoint_dir = training_args.output_dir
248
  # If the trainer_state.json is missing, look for the previous checkpoint
249
  previous_checkpoints = sorted(os.listdir("/data/results"), key=get_checkpoint_int, reverse=True)
@@ -251,7 +252,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
251
  for check in previous_checkpoints:
252
  try:
253
  print(f"Removing previous checkpoint {check}")
254
- os.remove(os.path.join('/data/results', check))
255
  train_result = trainer.train(resume_from_checkpoint=True)
256
  trainer.push_to_hub(commit_message="Training complete!")
257
  return 'DONE!'#train_result
 
244
  try:
245
  train_result = trainer.train(resume_from_checkpoint=True)
246
  except:
247
+ import shutil
248
  checkpoint_dir = training_args.output_dir
249
  # If the trainer_state.json is missing, look for the previous checkpoint
250
  previous_checkpoints = sorted(os.listdir("/data/results"), key=get_checkpoint_int, reverse=True)
 
252
  for check in previous_checkpoints:
253
  try:
254
  print(f"Removing previous checkpoint {check}")
255
+ shutil.rmtree(os.path.join('/data/results', check))
256
  train_result = trainer.train(resume_from_checkpoint=True)
257
  trainer.push_to_hub(commit_message="Training complete!")
258
  return 'DONE!'#train_result