From 353056d5b1fb93564982210e4f5ae3f15b7891b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Lilleeng=20S=C3=A6tra?= Date: Thu, 7 Apr 2022 13:52:16 +0200 Subject: [PATCH] Added arguments for job script --- saga-dev.job | 6 +++--- saga_strong_scaling_benchmark.job | 16 ++++++++-------- saga_strong_scaling_benchmark.sh | 18 +++++++++--------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/saga-dev.job b/saga-dev.job index a0023ff..7aa6786 100644 --- a/saga-dev.job +++ b/saga-dev.job @@ -6,7 +6,7 @@ #SBATCH --account=nn9882k # # Wall clock limit: -#SBATCH --time=00:20:00 +#SBATCH --time=00:02:00 # # NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below # @@ -28,7 +28,7 @@ # source /cluster/bin/jobsetup module restore system # instead of 'module purge' rather set module environment to the system default -module load CUDA/10.2.89 +module load CUDA/11.4.1 # It is also recommended to to list loaded modules, for easier debugging: module list @@ -41,7 +41,7 @@ mkdir $SCRATCH/ShallowWaterGPU cp -r . $SCRATCH/ShallowWaterGPU ## Make sure the results are copied back to the submit directory (see Work Directory below): -# chkfile MyResultFile +# chkfile MyResultFileq # chkfile is replaced by 'savefile' on Saga savefile "$SCRATCH/ShallowWaterGPU/*.log" savefile "$SCRATCH/ShallowWaterGPU/*.nc" diff --git a/saga_strong_scaling_benchmark.job b/saga_strong_scaling_benchmark.job index 0d729c3..fc61ffb 100644 --- a/saga_strong_scaling_benchmark.job +++ b/saga_strong_scaling_benchmark.job @@ -6,26 +6,26 @@ #SBATCH --account=nn9882k # # Wall clock limit: -#SBATCH --time=10:00:00 +#SBATCH --time=00:10:00 +# +# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below # -# Ask for 1 GPU (max is 2) # Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU # device(s) to use. It will have values '0', '1' or '0,1' corresponding to # /dev/nvidia0, /dev/nvidia1 or both, respectively. -#SBATCH --partition=accel --gres=gpu:1 +#SBATCH --partition=accel # # Max memory usage per task (core) - increasing this will cost more core hours: -#SBATCH --mem-per-cpu=16G +#SBATCH --mem-per-cpu=3800M # -# Number of tasks: -#SBATCH --nodes=1 --ntasks-per-node=1 +#SBATCH --qos=devel ## Set up job environment: (this is done automatically behind the scenes) ## (make sure to comment '#' or remove the following line 'source ...') # source /cluster/bin/jobsetup module restore system # instead of 'module purge' rather set module environment to the system default -module load CUDA/10.2.89 +module load CUDA/11.4.1 # It is also recommended to to list loaded modules, for easier debugging: module list @@ -47,5 +47,5 @@ savefile "$SCRATCH/ShallowWaterGPU/*.json" ## Do some work: cd $SCRATCH/ShallowWaterGPU srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version -srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 1024 -ny 1024 --profile +srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile diff --git a/saga_strong_scaling_benchmark.sh b/saga_strong_scaling_benchmark.sh index 1d2284d..96ba541 100644 --- a/saga_strong_scaling_benchmark.sh +++ b/saga_strong_scaling_benchmark.sh @@ -1,13 +1,13 @@ #!/bin/bash -# one node: 1-4 tasks/GPUs -sbatch --partition=accel --gres=gpu:1 --nodes=1 --ntasks-per-node=1 saga_strong_scaling_benchmark.job -sbatch --partition=accel --gres=gpu:2 --nodes=1 --ntasks-per-node=2 saga_strong_scaling_benchmark.job -sbatch --partition=accel --gres=gpu:3 --nodes=1 --ntasks-per-node=3 saga_strong_scaling_benchmark.job -sbatch --partition=accel --gres=gpu:4 --nodes=1 --ntasks-per-node=4 saga_strong_scaling_benchmark.job +# one node: 1-4 GPUs +sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=1024,NY=1024 saga_strong_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=1024,NY=512 saga_strong_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=1024,NY=341 saga_strong_scaling_benchmark.job +sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=512,NY=512 saga_strong_scaling_benchmark.job -# 2-4 nodes: 4 tasks/GPUs per node -sbatch --partition=accel --gres=gpu:4 --nodes=2 --ntasks-per-node=4 saga_strong_scaling_benchmark.job -sbatch --partition=accel --gres=gpu:4 --nodes=3 --ntasks-per-node=4 saga_strong_scaling_benchmark.job -sbatch --partition=accel --gres=gpu:4 --nodes=4 --ntasks-per-node=4 saga_strong_scaling_benchmark.job +# 2-4 nodes: 1 GPUs per node +sbatch --nodes=2 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=1024,NY=512 saga_strong_scaling_benchmark.job +sbatch --nodes=3 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=1024,NY=341 saga_strong_scaling_benchmark.job +sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=512,NY=512 saga_strong_scaling_benchmark.job