File size: 613 Bytes
eeb9dce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#!/bin/bash -x
#SBATCH --account=zam
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4
#SBATCH --cpus-per-task=24
#SBATCH --time=06:00:00
#SBATCH --gres=gpu:4
#SBATCH --partition=dc-gpu
ml CUDA
source /p/project/laionize/miniconda/bin/activate
conda activate ddgan
#source scripts/init_2022.sh
#source scripts/init_2020.sh
#source scripts/init.sh
export CUDA_VISIBLE_DEVICES=0,1,2,3
echo "Job id: $SLURM_JOB_ID"
export TOKENIZERS_PARALLELISM=false
#export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export TORCH_DISTRIBUTED_DEBUG=INFO
srun python -u $*