import os import time import torch import sys import subprocess argslist = list(sys.argv)[1:] log_dir = argslist[-1] num_gpus = torch.cuda.device_count() argslist.append('--n_gpus={}'.format(num_gpus)) workers = [] job_id = time.strftime("%Y_%m_%d-%H%M%S") argslist.append("--group_name=group_{}".format(job_id)) print("GPU log directory is {}".format(log_dir)) os.makedirs(log_dir, exist_ok=True) for i in range(num_gpus): argslist.append('--rank={}'.format(i)) stdout = None if i == 0 else open("{}/{}_GPU_{}.log".format(log_dir, job_id, i), "w") print(argslist) p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout) workers.append(p) argslist = argslist[:-1] for p in workers: p.wait()