Spaces:
Runtime error
Runtime error
Mehdi Cherti
commited on
Commit
•
eeb9dce
1
Parent(s):
a4a6a13
add scripts
Browse files- scripts/eval_all.sh +34 -0
- scripts/fid.sh +0 -0
- scripts/init.sh +14 -0
- scripts/run_hdfml.sh +25 -0
- scripts/run_jurecadc_conda.sh +23 -0
- scripts/run_jurecadc_ddp.sh +21 -0
- scripts/run_jusuf_ddp.sh +14 -0
- scripts/run_juwelsbooster_conda.sh +19 -0
- scripts/run_juwelsbooster_ddp.sh +17 -0
scripts/eval_all.sh
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
#for model in ddgan_sd_v10 ddgan_laion2b_v2 ddgan_ddb_v1 ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4;do
|
3 |
+
#for model in ddgan_ddb_v2 ddgan_ddb_v3 ddgan_ddb_v4 ddgan_ddb_v5;do
|
4 |
+
#for model in ddgan_ddb_v4 ddgan_ddb_v6 ddgan_ddb_v7 ddgan_laion_aesthetic_v15;do
|
5 |
+
#for model in ddgan_ddb_v6;do
|
6 |
+
#for model in ddgan_laion_aesthetic_v15;do
|
7 |
+
#for model in ddgan_ddb_v3 ddgan_ddb_v11 ddgan_laion_aesthetic_v15;do
|
8 |
+
#for model in ddgan_ddb_v3 ddgan_ddb_v11 ddgan_ddb_v2;do
|
9 |
+
#for model in ddgan_ddb_v6 ddgan_ddb_v4 ddgan_ddb_v10 ddgan_ddb_v9;do
|
10 |
+
#for model in ddgan_ddb_v3 ddgan_ddb_v11;do
|
11 |
+
for model in ddgan_ddb_v11;do
|
12 |
+
#for model in ddgan_ddb_v3;do
|
13 |
+
if [ "$model" == "ddgan_ddb_v3" ]; then
|
14 |
+
bs=32
|
15 |
+
elif [ "$model" == "ddgan_laion_aesthetic_v15" ]; then
|
16 |
+
bs=32
|
17 |
+
elif [ "$model" == "ddgan_ddb_v6" ]; then
|
18 |
+
bs=32
|
19 |
+
elif [ "$model" == "ddgan_ddb_v4" ]; then
|
20 |
+
bs=16
|
21 |
+
elif [ "$model" == "ddgan_ddb_v9" ]; then
|
22 |
+
bs=16
|
23 |
+
elif [ "$model" == "ddgan_ddb_v10" ]; then
|
24 |
+
bs=16
|
25 |
+
elif [ "$model" == "ddgan_ddb_v11" ]; then
|
26 |
+
bs=16
|
27 |
+
else
|
28 |
+
bs=64
|
29 |
+
fi
|
30 |
+
sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --cond-text=parti_prompts.txt --batch-size=$bs --epoch=-1 --compute-image-reward --eval-name=parti_image_reward
|
31 |
+
#sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --cond-text=parti_prompts.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=parti;
|
32 |
+
#sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --fid --real-img-dir inception_statistics_coco_val2014_256x256.npz --cond-text coco_val2014_captions.txt --batch-size=$bs --epoch=-1 --nb-images-for-fid=30000 --eval-name=coco --compute-clip-score;
|
33 |
+
#sbatch --partition dc-gpu -t 360 -N 1 -n1 scripts/run_jurecadc_ddp.sh test_ddgan.py --name $model --cond-text=drawbench.txt --batch-size=$bs --epoch=-1 --compute-clip-score --eval-name=drawbench;
|
34 |
+
done
|
scripts/fid.sh
ADDED
File without changes
|
scripts/init.sh
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ml purge
|
2 |
+
ml use $OTHERSTAGES
|
3 |
+
ml Stages/2022
|
4 |
+
ml GCC/11.2.0
|
5 |
+
ml OpenMPI/4.1.2
|
6 |
+
ml CUDA/11.5
|
7 |
+
ml cuDNN/8.3.1.22-CUDA-11.5
|
8 |
+
ml NCCL/2.12.7-1-CUDA-11.5
|
9 |
+
ml PyTorch/1.11-CUDA-11.5
|
10 |
+
ml Horovod/0.24
|
11 |
+
ml torchvision/0.12.0
|
12 |
+
source /p/home/jusers/cherti1/jureca/ccstdl/code/feed_forward_vqgan_clip/envs/jureca_2022/bin/activate
|
13 |
+
export HOROVOD_CACHE_CAPACITY=4096
|
14 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
scripts/run_hdfml.sh
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -x
|
2 |
+
#SBATCH --account=cstdl
|
3 |
+
#SBATCH --nodes=8
|
4 |
+
#SBATCH --ntasks-per-node=4
|
5 |
+
#SBATCH --cpus-per-task=8
|
6 |
+
#SBATCH --time=06:00:00
|
7 |
+
#SBATCH --gres=gpu
|
8 |
+
#SBATCH --partition=batch
|
9 |
+
ml purge
|
10 |
+
ml use $OTHERSTAGES
|
11 |
+
ml Stages/2022
|
12 |
+
ml GCC/11.2.0
|
13 |
+
ml OpenMPI/4.1.2
|
14 |
+
ml CUDA/11.5
|
15 |
+
ml cuDNN/8.3.1.22-CUDA-11.5
|
16 |
+
ml NCCL/2.12.7-1-CUDA-11.5
|
17 |
+
ml PyTorch/1.11-CUDA-11.5
|
18 |
+
ml Horovod/0.24
|
19 |
+
ml torchvision/0.12.0
|
20 |
+
source envs/hdfml/bin/activate
|
21 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
22 |
+
echo "Job id: $SLURM_JOB_ID"
|
23 |
+
export TOKENIZERS_PARALLELISM=false
|
24 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
25 |
+
srun python -u $*
|
scripts/run_jurecadc_conda.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -x
|
2 |
+
#SBATCH --account=zam
|
3 |
+
#SBATCH --nodes=1
|
4 |
+
#SBATCH --ntasks-per-node=4
|
5 |
+
#SBATCH --cpus-per-task=24
|
6 |
+
#SBATCH --time=06:00:00
|
7 |
+
#SBATCH --gres=gpu:4
|
8 |
+
#SBATCH --partition=dc-gpu
|
9 |
+
ml CUDA
|
10 |
+
source /p/project/laionize/miniconda/bin/activate
|
11 |
+
conda activate ddgan
|
12 |
+
#source scripts/init_2022.sh
|
13 |
+
#source scripts/init_2020.sh
|
14 |
+
#source scripts/init.sh
|
15 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
16 |
+
echo "Job id: $SLURM_JOB_ID"
|
17 |
+
export TOKENIZERS_PARALLELISM=false
|
18 |
+
#export NCCL_ASYNC_ERROR_HANDLING=1
|
19 |
+
export NCCL_IB_TIMEOUT=50
|
20 |
+
export UCX_RC_TIMEOUT=4s
|
21 |
+
export NCCL_IB_RETRY_CNT=10
|
22 |
+
export TORCH_DISTRIBUTED_DEBUG=INFO
|
23 |
+
srun python -u $*
|
scripts/run_jurecadc_ddp.sh
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -x
|
2 |
+
#SBATCH --account=zam
|
3 |
+
#SBATCH --nodes=1
|
4 |
+
#SBATCH --ntasks-per-node=4
|
5 |
+
#SBATCH --cpus-per-task=24
|
6 |
+
#SBATCH --time=06:00:00
|
7 |
+
#SBATCH --gres=gpu:4
|
8 |
+
#SBATCH --partition=dc-gpu
|
9 |
+
source set_torch_distributed_vars.sh
|
10 |
+
#source scripts/init_2022.sh
|
11 |
+
#source scripts/init_2020.sh
|
12 |
+
source scripts/init.sh
|
13 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
14 |
+
echo "Job id: $SLURM_JOB_ID"
|
15 |
+
export TOKENIZERS_PARALLELISM=false
|
16 |
+
#export NCCL_ASYNC_ERROR_HANDLING=1
|
17 |
+
export NCCL_IB_TIMEOUT=50
|
18 |
+
export UCX_RC_TIMEOUT=4s
|
19 |
+
export NCCL_IB_RETRY_CNT=10
|
20 |
+
export TRANSFORMERS_CACHE=cache
|
21 |
+
srun python -u $*
|
scripts/run_jusuf_ddp.sh
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -x
|
2 |
+
#SBATCH --account=zam
|
3 |
+
#SBATCH --nodes=1
|
4 |
+
#SBATCH --ntasks-per-node=1
|
5 |
+
#SBATCH --cpus-per-task=24
|
6 |
+
#SBATCH --time=06:00:00
|
7 |
+
#SBATCH --gres=gpu:1
|
8 |
+
#SBATCH --partition=gpus
|
9 |
+
source set_torch_distributed_vars.sh
|
10 |
+
source scripts/init.sh
|
11 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
12 |
+
echo "Job id: $SLURM_JOB_ID"
|
13 |
+
export TOKENIZERS_PARALLELISM=false
|
14 |
+
srun python -u $*
|
scripts/run_juwelsbooster_conda.sh
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -x
|
2 |
+
#SBATCH --account=laionize
|
3 |
+
#SBATCH --nodes=1
|
4 |
+
#SBATCH --ntasks-per-node=4
|
5 |
+
#SBATCH --cpus-per-task=24
|
6 |
+
#SBATCH --time=06:00:00
|
7 |
+
#SBATCH --gres=gpu:4
|
8 |
+
#SBATCH --partition=booster
|
9 |
+
ml CUDA
|
10 |
+
source /p/project/laionize/miniconda/bin/activate
|
11 |
+
conda activate ddgan
|
12 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
13 |
+
echo "Job id: $SLURM_JOB_ID"
|
14 |
+
export TOKENIZERS_PARALLELISM=false
|
15 |
+
#export NCCL_ASYNC_ERROR_HANDLING=1
|
16 |
+
export NCCL_IB_TIMEOUT=50
|
17 |
+
export UCX_RC_TIMEOUT=4s
|
18 |
+
export NCCL_IB_RETRY_CNT=10
|
19 |
+
srun python -u $*
|
scripts/run_juwelsbooster_ddp.sh
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash -x
|
2 |
+
#SBATCH --account=covidnetx
|
3 |
+
#SBATCH --nodes=4
|
4 |
+
#SBATCH --ntasks-per-node=4
|
5 |
+
#SBATCH --cpus-per-task=24
|
6 |
+
#SBATCH --time=06:00:00
|
7 |
+
#SBATCH --gres=gpu:4
|
8 |
+
#SBATCH --partition=booster
|
9 |
+
source set_torch_distributed_vars.sh
|
10 |
+
#source scripts/init_2022.sh
|
11 |
+
#source scripts/init_2020.sh
|
12 |
+
source scripts/init.sh
|
13 |
+
export CUDA_VISIBLE_DEVICES=0,1,2,3
|
14 |
+
echo "Job id: $SLURM_JOB_ID"
|
15 |
+
export TOKENIZERS_PARALLELISM=false
|
16 |
+
export NCCL_ASYNC_ERROR_HANDLING=1
|
17 |
+
srun python -u $*
|