Kevin Fink
commited on
Commit
·
5b51e47
1
Parent(s):
334f1e5
dev
Browse files
app.py
CHANGED
@@ -244,6 +244,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
244 |
try:
|
245 |
train_result = trainer.train(resume_from_checkpoint=True)
|
246 |
except:
|
|
|
247 |
checkpoint_dir = training_args.output_dir
|
248 |
# If the trainer_state.json is missing, look for the previous checkpoint
|
249 |
previous_checkpoints = sorted(os.listdir("/data/results"), key=get_checkpoint_int, reverse=True)
|
@@ -251,7 +252,7 @@ def fine_tune_model(model, dataset_name, hub_id, api_key, num_epochs, batch_size
|
|
251 |
for check in previous_checkpoints:
|
252 |
try:
|
253 |
print(f"Removing previous checkpoint {check}")
|
254 |
-
|
255 |
train_result = trainer.train(resume_from_checkpoint=True)
|
256 |
trainer.push_to_hub(commit_message="Training complete!")
|
257 |
return 'DONE!'#train_result
|
|
|
244 |
try:
|
245 |
train_result = trainer.train(resume_from_checkpoint=True)
|
246 |
except:
|
247 |
+
import shutil
|
248 |
checkpoint_dir = training_args.output_dir
|
249 |
# If the trainer_state.json is missing, look for the previous checkpoint
|
250 |
previous_checkpoints = sorted(os.listdir("/data/results"), key=get_checkpoint_int, reverse=True)
|
|
|
252 |
for check in previous_checkpoints:
|
253 |
try:
|
254 |
print(f"Removing previous checkpoint {check}")
|
255 |
+
shutil.rmtree(os.path.join('/data/results', check))
|
256 |
train_result = trainer.train(resume_from_checkpoint=True)
|
257 |
trainer.push_to_hub(commit_message="Training complete!")
|
258 |
return 'DONE!'#train_result
|