Revised DGX-2 benchmarks and added HGX benchmarks.

This commit is contained in:
Martin Lilleeng Sætra 2022-06-20 09:28:45 +02:00
parent 0f0cbad2dd
commit aa693a9a46
6 changed files with 428 additions and 263 deletions

File diff suppressed because one or more lines are too long

View File

@ -3,20 +3,36 @@
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S") TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-16 GPUs # one node: 1-16 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=4096,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=4096,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=2731,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=2731,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=2048,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=2048,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=1638,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=1638,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=1365,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=1365,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=1170,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=1170,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=1024,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=1024,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#
#sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=8192,NY=910,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=8192,NY=819,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=8192,NY=745,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=8192,NY=683,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=8192,NY=630,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=8192,NY=585,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=8192,NY=546,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=512,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=8192,NY=910,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job # one node: 4-16 GPUs
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=8192,NY=819,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=10496,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=8192,NY=745,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=8396,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=8192,NY=683,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=6997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=8192,NY=630,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=5997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=8192,NY=585,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=5248,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=8192,NY=546,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=512,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=41984,NY=4664,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=41984,NY=4198,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=41984,NY=3816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=41984,NY=3498,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=41984,NY=3229,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=41984,NY=2998,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=41984,NY=2798,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=41984,NY=2624,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job

View File

@ -3,20 +3,39 @@
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S") TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-16 GPUs # one node: 1-16 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job #sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#
#sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job # one node: 1-16 GPUs
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=41984,NY=41984,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job

58
hgx_scaling_benchmark.job Normal file
View File

@ -0,0 +1,58 @@
#!/bin/bash
# See http://wiki.ex3.simula.no before changing the values below
#SBATCH -p hgx2q # partition (GPU queue)
#SBATCH -w g002 # HGX node
#SBATCH -t 0-00:10 # time (D-HH:MM)
#SBATCH -o slurm.%N.%j.out # STDOUT
#SBATCH -e slurm.%N.%j.err # STDERR
#SBATCH --reservation=martinls_11
# For Linux 64, Open MPI is built with CUDA awareness but this support is disabled by default.
# To enable it, please set the environment variable OMPI_MCA_opal_cuda_support=true before
# launching your MPI processes. Equivalently, you can set the MCA parameter in the command line:
# mpiexec --mca opal_cuda_support 1 ...
#
# In addition, the UCX support is also built but disabled by default.
# To enable it, first install UCX (conda install -c conda-forge ucx). Then, set the environment
# variables OMPI_MCA_pml="ucx" OMPI_MCA_osc="ucx" before launching your MPI processes.
# Equivalently, you can set the MCA parameters in the command line:
# mpiexec --mca pml ucx --mca osc ucx ...
# Note that you might also need to set UCX_MEMTYPE_CACHE=n for CUDA awareness via UCX.
# Please consult UCX's documentation for detail.
ulimit -s 10240
module load slurm/20.02.7
module load cuda11.2/toolkit/11.2.2
module load openmpi4-cuda11.2-ofed50-gcc8/4.1.0
# Check how many gpu's your job got
#nvidia-smi
mkdir -p output_hgx/$NOW
## Copy input files to the work directory:
mkdir -p /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
cp -r . /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
# Run job
# (Assumes Miniconda is installed in user root dir.)
cd /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
#mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
#nsys profile -t nvtx,cuda mpirun -np $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
#mpirun -np $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
export OMPI_MCA_opal_cuda_support=true
mpirun -np $SLURM_NTASKS $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
cd $HOME/src/ShallowWaterGPU
## Copy files from work directory:
# (NOTE: Copying is not performed if job fails!)
mkdir -p output_hgx/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.log ./output_hgx/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.nc ./output_hgx/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.json ./output_hgx/$NOW
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.qdrep ./output_hgx/$NOW
rm -rf /work/$USER/$SLURM_JOB_ID

View File

@ -0,0 +1,20 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-8 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=4096,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=2731,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=2048,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=1638,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=1365,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=1170,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=1024,NOW=$TIMESTAMP hgx_scaling_benchmark.job
# one node: 4-8 GPUs
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=10496,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=8396,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=6997,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=5997,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=5248,NOW=$TIMESTAMP hgx_scaling_benchmark.job

View File

@ -0,0 +1,23 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-16 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
# one node: 1-8 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job