Start of HIPIFYcation

This commit is contained in:
Anthony Berg 2025-06-16 16:09:45 +02:00
parent e7cd6ae34a
commit 19ea8319e8
29 changed files with 11 additions and 650 deletions

View File

@ -1,29 +1,28 @@
# Assumes that conda, pip, build-essentials and cuda are installed
---
name: ShallowWaterGPU
name: FiniteVolumeGPU_HPC
channels:
- conda-forge
dependencies:
- python=3.9
- python=3.13.5
- pip
- numpy
- matplotlib
- jupyter
- mpi4py
- six
- pytools
- netcdf4
- scipy
- nb_conda_kernels
- nbdime
- mpi4py
- ffmpeg
- pycuda
- ipyparallel
- line_profiler
- tqdm
- pip:
- hip-python==6.4.1.552.39
- -i https://test.pypi.org/simple/
# Install conda environment (one-time operation):
# $ conda env create -f conda_environment.yml
# Activate environment
# $ conda activate ShallowWaterGPU
# $ conda activate FiniteVolumeGPU_HPC
# OPTIONAL: If you want to compile pycuda yourself, uncomment pycuda under
# "dependencies" above and do the following (one-time operation):

View File

@ -1,23 +0,0 @@
# Assumes that conda, pip, build-essentials and cuda are installed
---
name: ShallowWaterGPU_HPC
channels:
- conda-forge
dependencies:
- python=3.7
- numpy
- mpi4py
- six
- pytools
- netcdf4
- scipy
- tqdm
# Install conda environment (one-time operation):
# $ conda env create -f conda_environment_hpc.yml
# Activate environment and install the following packages using pip:
# $ conda activate ShallowWaterGPU_HPC
# - pycuda: $ pip3 install --no-deps -U pycuda
# on Windows: make sure your visual studio c++ compiler is available in PATH
# PATH should have something like C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\bin\

View File

@ -1,33 +0,0 @@
#!/bin/bash
#SBATCH -p dgx2q # partition (GPU queue)
#SBATCH -N 1 # number of nodes
#SBATCH -n 1 # number of cores
#SBATCH -w g001 # DGX-2 node
#SBATCH --gres=gpu:1 # number of V100's
#SBATCH -t 0-00:10 # time (D-HH:MM)
#SBATCH -o slurm.%N.%j.out # STDOUT
#SBATCH -e slurm.%N.%j.err # STDERR
ulimit -s 10240
module load slurm
module load cuda10.1/toolkit/10.1.243
# Check how many gpu's your job got
#nvidia-smi
## Copy input files to the work directory:
rm -rf /work/$USER/ShallowWaterGPU
mkdir -p /work/$USER/ShallowWaterGPU
cp -r . /work/$USER/ShallowWaterGPU
# Run job
# (Assumes Miniconda is installed in user root dir.)
cd /work/$USER/ShallowWaterGPU
nvprof -o profiler_output $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 shmemTesting.py
cd $HOME/src/ShallowWaterGPU
## Copy files from work directory:
# (NOTE: Copying is not performed if job fails!)
cp /work/$USER/ShallowWaterGPU/*.log .
cp /work/$USER/ShallowWaterGPU/*.nc .
cp /work/$USER/ShallowWaterGPU/profiler_output .

View File

@ -1,35 +0,0 @@
#!/bin/bash
# See http://wiki.ex3.simula.no before changing the values below
#SBATCH -p dgx2q # partition (GPU queue)
#SBATCH -N 1 # number of nodes
#SBATCH -n 4 # number of cores
#SBATCH -w g001 # DGX-2 node
#SBATCH --gres=gpu:4 # number of V100's
#SBATCH -t 0-00:10 # time (D-HH:MM)
#SBATCH -o slurm.%N.%j.out # STDOUT
#SBATCH -e slurm.%N.%j.err # STDERR
ulimit -s 10240
module load slurm
module load openmpi/4.0.1
module load cuda10.1/toolkit/10.1.243
# Check how many gpu's your job got
#nvidia-smi
## Copy input files to the work directory:
rm -rf /work/$USER/ShallowWaterGPU
mkdir -p /work/$USER/ShallowWaterGPU
cp -r . /work/$USER/ShallowWaterGPU
# Run job
# (Assumes Miniconda is installed in user root dir.)
cd /work/$USER/ShallowWaterGPU
mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py
cd $HOME/src/ShallowWaterGPU
## Copy files from work directory:
# (NOTE: Copying is not performed if job fails!)
cp /work/$USER/ShallowWaterGPU/*.log .
cp /work/$USER/ShallowWaterGPU/*.nc .

View File

@ -1,59 +0,0 @@
#!/bin/bash
# See http://wiki.ex3.simula.no before changing the values below
#SBATCH -p dgx2q # partition (GPU queue)
#SBATCH -w g001 # DGX-2 node
##SBATCH --gres=gpu:4 # number of V100's
#SBATCH -t 0-00:10 # time (D-HH:MM)
#SBATCH -o slurm.%N.%j.out # STDOUT
#SBATCH -e slurm.%N.%j.err # STDERR
#SBATCH --reservation=martinls_17
# For Linux 64, Open MPI is built with CUDA awareness but this support is disabled by default.
# To enable it, please set the environment variable OMPI_MCA_opal_cuda_support=true before
# launching your MPI processes. Equivalently, you can set the MCA parameter in the command line:
# mpiexec --mca opal_cuda_support 1 ...
#
# In addition, the UCX support is also built but disabled by default.
# To enable it, first install UCX (conda install -c conda-forge ucx). Then, set the environment
# variables OMPI_MCA_pml="ucx" OMPI_MCA_osc="ucx" before launching your MPI processes.
# Equivalently, you can set the MCA parameters in the command line:
# mpiexec --mca pml ucx --mca osc ucx ...
# Note that you might also need to set UCX_MEMTYPE_CACHE=n for CUDA awareness via UCX.
# Please consult UCX's documentation for detail.
ulimit -s 10240
module load slurm/20.02.7
module load cuda11.2/toolkit/11.2.2
module load openmpi4-cuda11.2-ofed50-gcc8/4.1.0
# Check how many gpu's your job got
#nvidia-smi
mkdir -p output_dgx-2/$NOW
## Copy input files to the work directory:
mkdir -p /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
cp -r . /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
# Run job
# (Assumes Miniconda is installed in user root dir.)
cd /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
#mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
#nsys profile -t nvtx,cuda mpirun -np $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
#mpirun -np $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
export OMPI_MCA_opal_cuda_support=true
mpirun -np $SLURM_NTASKS $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
cd $HOME/src/ShallowWaterGPU
## Copy files from work directory:
# (NOTE: Copying is not performed if job fails!)
mkdir -p output_dgx-2/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.log ./output_dgx-2/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.nc ./output_dgx-2/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.json ./output_dgx-2/$NOW
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.qdrep ./output_dgx-2/$NOW
rm -rf /work/$USER/$SLURM_JOB_ID

View File

@ -1,73 +0,0 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-16 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=4096,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=2731,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=2048,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=1638,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=1365,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=1170,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=1024,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#
#sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=8192,NY=910,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=8192,NY=819,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=8192,NY=745,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=8192,NY=683,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=8192,NY=630,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=8192,NY=585,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=8192,NY=546,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=512,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
# one node: 4-16 GPUs
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=10496,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=8396,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=6997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=5997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=5248,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#
#sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=41984,NY=4664,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=41984,NY=4198,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=41984,NY=3816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=41984,NY=3498,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=41984,NY=3229,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=41984,NY=2998,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=41984,NY=2798,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=41984,NY=2624,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
# one node: 1-16 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=22528,NY=11264,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=22528,NY=7509,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=22528,NY=5632,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=22528,NY=4505,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=22528,NY=3754,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=22528,NY=3218,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=22528,NY=2816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=22528,NY=2503,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=22528,NY=2252,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=22528,NY=2048,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=22528,NY=1877,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=22528,NY=1732,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=22528,NY=1609,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=22528,NY=1501,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=22528,NY=1408,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
# one node: 4-16 GPUs
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=45056,NY=11264,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=45056,NY=8396,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=45056,NY=6997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=45056,NY=5997,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=45056,NY=5248,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=45056,NY=4664,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=45056,NY=4198,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=45056,NY=3816,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=45056,NY=3498,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=45056,NY=3229,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=45056,NY=2998,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=45056,NY=2798,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=45056,NY=2624,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job

View File

@ -1,41 +0,0 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-16 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#
#sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
# one node: 1-16 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=9 --ntasks-per-node=9 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=10 --ntasks-per-node=10 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=11 --ntasks-per-node=11 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=12 --ntasks-per-node=12 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=13 --ntasks-per-node=13 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=14 --ntasks-per-node=14 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=15 --ntasks-per-node=15 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=16 --ntasks-per-node=16 --export=ALL,NX=22528,NY=22528,NOW=$TIMESTAMP dgx-2_scaling_benchmark.job

View File

@ -1,58 +0,0 @@
#!/bin/bash
# See http://wiki.ex3.simula.no before changing the values below
#SBATCH -p hgx2q # partition (GPU queue)
#SBATCH -w g002 # HGX node
#SBATCH -t 0-00:10 # time (D-HH:MM)
#SBATCH -o slurm.%N.%j.out # STDOUT
#SBATCH -e slurm.%N.%j.err # STDERR
#SBATCH --reservation=martinls_11
# For Linux 64, Open MPI is built with CUDA awareness but this support is disabled by default.
# To enable it, please set the environment variable OMPI_MCA_opal_cuda_support=true before
# launching your MPI processes. Equivalently, you can set the MCA parameter in the command line:
# mpiexec --mca opal_cuda_support 1 ...
#
# In addition, the UCX support is also built but disabled by default.
# To enable it, first install UCX (conda install -c conda-forge ucx). Then, set the environment
# variables OMPI_MCA_pml="ucx" OMPI_MCA_osc="ucx" before launching your MPI processes.
# Equivalently, you can set the MCA parameters in the command line:
# mpiexec --mca pml ucx --mca osc ucx ...
# Note that you might also need to set UCX_MEMTYPE_CACHE=n for CUDA awareness via UCX.
# Please consult UCX's documentation for detail.
ulimit -s 10240
module load slurm/20.02.7
module load cuda11.2/toolkit/11.2.2
module load openmpi4-cuda11.2-ofed50-gcc8/4.1.0
# Check how many gpu's your job got
#nvidia-smi
mkdir -p output_hgx/$NOW
## Copy input files to the work directory:
mkdir -p /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
cp -r . /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
# Run job
# (Assumes Miniconda is installed in user root dir.)
cd /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
#mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
#nsys profile -t nvtx,cuda mpirun -np $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
#mpirun -np $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
export OMPI_MCA_opal_cuda_support=true
mpirun -np $SLURM_NTASKS $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
cd $HOME/src/ShallowWaterGPU
## Copy files from work directory:
# (NOTE: Copying is not performed if job fails!)
mkdir -p output_hgx/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.log ./output_hgx/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.nc ./output_hgx/$NOW/$SLURM_JOB_ID
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.json ./output_hgx/$NOW
mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.qdrep ./output_hgx/$NOW
rm -rf /work/$USER/$SLURM_JOB_ID

View File

@ -1,20 +0,0 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-8 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=4096,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=2731,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=2048,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=1638,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=1365,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=1170,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=1024,NOW=$TIMESTAMP hgx_scaling_benchmark.job
# one node: 4-8 GPUs
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=41984,NY=10496,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=41984,NY=8396,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=41984,NY=6997,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=41984,NY=5997,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=41984,NY=5248,NOW=$TIMESTAMP hgx_scaling_benchmark.job

View File

@ -1,23 +0,0 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-16 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=8192,NY=8192,NOW=$TIMESTAMP hgx_scaling_benchmark.job
# one node: 1-8 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job
sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP hgx_scaling_benchmark.job

View File

@ -1,8 +0,0 @@
#!/bin/bash
module purge
module load git/2.21.0 hdf5/1.10.5-gcc cuda/10.1
conda activate ShallowWaterGPU_HPC
python mpiTesting.py

View File

@ -1,54 +0,0 @@
#!/bin/bash
# Job name:
#SBATCH --job-name=ShallowWaterGPUScalingDev
#
# Project:
#SBATCH --account=nn9882k
#
# Wall clock limit:
#SBATCH --time=00:02:00
#
# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below
#
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
#SBATCH --partition=accel
#
# Max memory usage per task (core) - increasing this will cost more core hours:
#SBATCH --mem-per-cpu=3800M
#
# Number of tasks:
#SBATCH --nodes=1 --gpus-per-node=1 --ntasks-per-node=1
#
#SBATCH --qos=devel
## Set up job environment: (this is done automatically behind the scenes)
## (make sure to comment '#' or remove the following line 'source ...')
# source /cluster/bin/jobsetup
module restore system # instead of 'module purge' rather set module environment to the system default
module load CUDA/11.4.1
# It is also recommended to to list loaded modules, for easier debugging:
module list
set -o errexit # exit on errors
set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
## Copy input files to the work directory:
mkdir $SCRATCH/ShallowWaterGPU
cp -r . $SCRATCH/ShallowWaterGPU
## Make sure the results are copied back to the submit directory (see Work Directory below):
# chkfile MyResultFileq
# chkfile is replaced by 'savefile' on Saga
savefile "$SCRATCH/ShallowWaterGPU/*.log"
savefile "$SCRATCH/ShallowWaterGPU/*.nc"
savefile "$SCRATCH/ShallowWaterGPU/*.json"
## Do some work:
cd $SCRATCH/ShallowWaterGPU
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 1024 -ny 1024 --profile

View File

@ -1,52 +0,0 @@
#!/bin/bash
# Job name:
#SBATCH --job-name=ShallowWaterGPUStrongScaling
#
# Project:
#SBATCH --account=nn9882k
#
# Wall clock limit:
#SBATCH --time=24:00:00
#
# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below
#
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
#SBATCH --partition=accel
#
# Max memory usage per task (core) - increasing this will cost more core hours:
#SBATCH --mem-per-cpu=3800M
#
# Number of tasks:
#SBATCH --nodes=1 --gpus-per-node=1 --ntasks-per-node=1
## Set up job environment: (this is done automatically behind the scenes)
## (make sure to comment '#' or remove the following line 'source ...')
# source /cluster/bin/jobsetup
module restore system # instead of 'module purge' rather set module environment to the system default
module load CUDA/10.2.89
# It is also recommended to to list loaded modules, for easier debugging:
module list
set -o errexit # exit on errors
set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
## Copy input files to the work directory:
mkdir $SCRATCH/ShallowWaterGPU
cp -r . $SCRATCH/ShallowWaterGPU
## Make sure the results are copied back to the submit directory (see Work Directory below):
# chkfile MyResultFile
# chkfile is replaced by 'savefile' on Saga
savefile "$SCRATCH/ShallowWaterGPU/*.log"
savefile "$SCRATCH/ShallowWaterGPU/*.nc"
savefile "$SCRATCH/ShallowWaterGPU/*.json"
## Do some work:
cd $SCRATCH/ShallowWaterGPU
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 8192 -ny 8192 --profile

View File

@ -1,65 +0,0 @@
#!/bin/bash
# Job name:
#SBATCH --job-name=ShallowWaterGPUScaling
#
# Project:
#SBATCH --account=nn9882k
#
# Wall clock limit:
#SBATCH --time=00:10:00
#
# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below
#
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
#SBATCH --partition=accel
#
# Max memory usage per task (core) - increasing this will cost more core hours:
##SBATCH --mem-per-cpu=3800M
#SBATCH --mem-per-cpu=24G
#
#SBATCH --qos=devel
## Set up job environment: (this is done automatically behind the scenes)
## (make sure to comment '#' or remove the following line 'source ...')
# source /cluster/bin/jobsetup
module restore system # instead of 'module purge' rather set module environment to the system default
module load CUDA/11.4.1
#module load CUDA/11.1.1-GCC-10.2.0
#module load OpenMPI/4.0.5-gcccuda-2020b
# It is also recommended to to list loaded modules, for easier debugging:
module list
set -o errexit # exit on errors
set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
## Copy input files to the work directory:
mkdir $SCRATCH/ShallowWaterGPU
cp -r . $SCRATCH/ShallowWaterGPU
## Make sure the results are copied back to the submit directory (see Work Directory below):
# chkfile MyResultFile
# chkfile is replaced by 'savefile' on Saga
#savefile "$SCRATCH/ShallowWaterGPU/*.log"
#savefile "$SCRATCH/ShallowWaterGPU/*.nc"
#savefile "$SCRATCH/ShallowWaterGPU/*.json"
#savefile "$SCRATCH/ShallowWaterGPU/*.qdrep"
cleanup "rm -rf $SCRATCH/ShallowWaterGPU"
export OMPI_MCA_opal_cuda_support=true
## Do some work:
cd $SCRATCH/ShallowWaterGPU
srun /cluster/projects/nn9882k/martinls/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
srun /cluster/projects/nn9882k/martinls/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
cd $HOME/src/ShallowWaterGPU
mkdir -p output_saga/$NOW/$SLURM_JOB_ID
mv $SCRATCH/ShallowWaterGPU/*.log ./output_saga/$NOW/$SLURM_JOB_ID
mv $SCRATCH/ShallowWaterGPU/*.nc ./output_saga/$NOW/$SLURM_JOB_ID
mv $SCRATCH/ShallowWaterGPU/*.json ./output_saga/$NOW
mv $SCRATCH/ShallowWaterGPU/*.qdrep ./output_saga/$NOW

View File

@ -1,30 +0,0 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 14 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 1 ranks
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=20480,NY=10240,NOW=$TIMESTAMP saga_scaling_benchmark.job # 2 ranks
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=20480,NY=6826,NOW=$TIMESTAMP saga_scaling_benchmark.job # 3 ranks
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=20480,NY=5120,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
# 4 nodes: 14 GPUs per node
sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=5120,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
sbatch --nodes=4 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=20480,NY=2560,NOW=$TIMESTAMP saga_scaling_benchmark.job # 8 ranks
sbatch --nodes=4 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=20480,NY=1706,NOW=$TIMESTAMP saga_scaling_benchmark.job # 12 ranks
sbatch --nodes=4 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=20480,NY=1280,NOW=$TIMESTAMP saga_scaling_benchmark.job # 16 ranks
# 4 nodes: 14 GPUs per node
sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=40960,NY=10240,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
sbatch --nodes=4 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=40960,NY=5120,NOW=$TIMESTAMP saga_scaling_benchmark.job # 8 ranks
sbatch --nodes=4 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=40960,NY=3413,NOW=$TIMESTAMP saga_scaling_benchmark.job # 12 ranks
sbatch --nodes=4 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=40960,NY=2560,NOW=$TIMESTAMP saga_scaling_benchmark.job # 16 ranks
## one node: 14 GPUs
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=24576,NY=6144,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
#
## 4 nodes: 14 GPUs per node
#sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=24576,NY=6144,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
#sbatch --nodes=4 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=24576,NY=3072,NOW=$TIMESTAMP saga_scaling_benchmark.job # 8 ranks
#sbatch --nodes=4 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=24576,NY=2048,NOW=$TIMESTAMP saga_scaling_benchmark.job # 12 ranks
#sbatch --nodes=4 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=24576,NY=1536,NOW=$TIMESTAMP saga_scaling_benchmark.job # 16 ranks

View File

@ -1,25 +0,0 @@
#!/bin/bash
TIMESTAMP=$(date "+%Y-%m-%dT%H%M%S")
# one node: 1-4 GPUs
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 1 ranks
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 2 ranks
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 3 ranks
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
# 2-4 nodes: 1 GPUs per node
sbatch --nodes=2 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 2 ranks
sbatch --nodes=3 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 3 ranks
sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=20480,NY=20480,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
## one node: 1-4 GPUs
#sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 1 ranks
#sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 2 ranks
#sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 3 ranks
#sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks
## 2-4 nodes: 1 GPUs per node
#sbatch --nodes=2 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 2 ranks
#sbatch --nodes=3 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 3 ranks
#sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=12288,NY=12288,NOW=$TIMESTAMP saga_scaling_benchmark.job # 4 ranks

View File

@ -1,39 +0,0 @@
#!/bin/bash
NOW=$(date "+%Y-%m-%dT%H%M%S")
mkdir -p output_seymour/$NOW
# one node: 1-8 GPUs
mpiexec -n 1 python mpiTesting.py -nx 8192 -ny 8192 --profile &&
mkdir -p output_seymour/$NOW/1_proc &&
mv *.log output_seymour/$NOW/1_proc/ && mv *.nc output_seymour/$NOW/1_proc/ &&
mpiexec -n 2 python mpiTesting.py -nx 8192 -ny 4096 --profile &&
mkdir -p output_seymour/$NOW/2_proc &&
mv *.log output_seymour/$NOW/2_proc/ && mv *.nc output_seymour/$NOW/2_proc/ &&
mpiexec -n 3 python mpiTesting.py -nx 8192 -ny 2731 --profile &&
mkdir -p output_seymour/$NOW/3_proc &&
mv *.log output_seymour/$NOW/3_proc/ && mv *.nc output_seymour/$NOW/3_proc/ &&
mpiexec -n 4 python mpiTesting.py -nx 8192 -ny 2048 --profile &&
mkdir -p output_seymour/$NOW/4_proc &&
mv *.log output_seymour/$NOW/4_proc/ && mv *.nc output_seymour/$NOW/4_proc/ &&
mpiexec -n 5 python mpiTesting.py -nx 8192 -ny 1638 --profile &&
mkdir -p output_seymour/$NOW/5_proc &&
mv *.log output_seymour/$NOW/5_proc/ && mv *.nc output_seymour/$NOW/5_proc/ &&
mpiexec -n 6 python mpiTesting.py -nx 8192 -ny 1365 --profile &&
mkdir -p output_seymour/$NOW/6_proc &&
mv *.log output_seymour/$NOW/6_proc/ && mv *.nc output_seymour/$NOW/6_proc/ &&
mpiexec -n 7 python mpiTesting.py -nx 8192 -ny 1170 --profile &&
mkdir -p output_seymour/$NOW/7_proc &&
mv *.log output_seymour/$NOW/7_proc/ && mv *.nc output_seymour/$NOW/7_proc/ &&
mpiexec -n 8 python mpiTesting.py -nx 8192 -ny 1024 --profile &&
mkdir -p output_seymour/$NOW/8_proc &&
mv *.log output_seymour/$NOW/8_proc/ && mv *.nc output_seymour/$NOW/8_proc/ &&
for filename in *.json; do mv "$filename" "output_seymour/$NOW/MPI_${NOW}_${filename#????}"; done;