Setup for scaling ex's on Saga.

2025-07-16 06:10:59 +02:00 · 2021-05-23 20:32:22 +02:00 · 2021-05-23 20:32:22 +02:00 · 9c4de881a8
commit 9c4de881a8
parent 11223f0890
5 changed files with 182 additions and 26 deletions
--- a/mpiTesting.py
+++ b/mpiTesting.py
@ -25,29 +25,43 @@ import gc
 import time
 import json
 import logging
 import os
-#MPI
+# MPI
 from mpi4py import MPI
-#CUDA
+# CUDA
 import pycuda.driver as cuda
-#Simulator engine etc
+# Simulator engine etc
 from GPUSimulators import MPISimulator, Common, CudaContext
 from GPUSimulators import EE2D_KP07_dimsplit
 from GPUSimulators.helpers import InitialConditions as IC
 from GPUSimulators.Simulator import BoundaryCondition as BC
 import argparse
 parser = argparse.ArgumentParser(description='Strong and weak scaling experiments.')
 parser.add_argument('-nx', type=int, default=128)
 parser.add_argument('-ny', type=int, default=128)
 parser.add_argument('--profile', action='store_true') # default: False
-#Get MPI COMM to use
+
 args = parser.parse_args()
 if(args.profile):
    # profiling: total run time
    t_total_start = time.time()
 # Get MPI COMM to use
 comm = MPI.COMM_WORLD
 ####
-#Initialize logging 
+# Initialize logging
 ####
 log_level_console = 20
-log_level_file    = 10
+log_level_file = 10
 log_filename = 'mpi_' + str(comm.rank) + '.log'
 logger = logging.getLogger('GPUSimulators')
 logger.setLevel(min(log_level_console, log_level_file))
@ -55,15 +69,17 @@ logger.setLevel(min(log_level_console, log_level_file))
 ch = logging.StreamHandler()
 ch.setLevel(log_level_console)
 logger.addHandler(ch)
-logger.info("Console logger using level %s", logging.getLevelName(log_level_console))
+logger.info("Console logger using level %s",
            logging.getLevelName(log_level_console))
 fh = logging.FileHandler(log_filename)
-formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s: %(message)s')
+formatter = logging.Formatter(
    '%(asctime)s:%(name)s:%(levelname)s: %(message)s')
 fh.setFormatter(formatter)
 fh.setLevel(log_level_file)
 logger.addHandler(fh)
-logger.info("File logger using level %s to %s", logging.getLevelName(log_level_file), log_filename)
+logger.info("File logger using level %s to %s",
-
+            logging.getLevelName(log_level_file), log_filename)
 ####
@ -73,7 +89,6 @@ logger.info("Creating MPI grid")
 grid = MPISimulator.MPIGrid(MPI.COMM_WORLD)
 ####
 # Initialize CUDA
 ####
@ -85,15 +100,15 @@ cuda_device = local_rank % num_cuda_devices
 cuda_context = CudaContext.CudaContext(device=cuda_device, autotuning=False)
 ####
 # Set initial conditions
 ####
 logger.info("Generating initial conditions")
-nx = 128
+nx = args.nx
-ny = 128
+ny = args.ny
 gamma = 1.4
-save_times = np.linspace(0, 5.0, 10)
+save_times = np.linspace(0, 10.0, 2)
 outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc"
 save_var_names = ['rho', 'rho_u', 'rho_v', 'E']
@ -103,20 +118,43 @@ arguments['theta'] = 1.2
 arguments['grid'] = grid
 ####
 # Run simulation
 ####
 logger.info("Running simulation")
-#Helper function to create MPI simulator
+# Helper function to create MPI simulator
 def genSim(grid, **kwargs):
    local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
    sim = MPISimulator.MPISimulator(local_sim, grid)
    return sim
 outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)
 outfile = Common.runSimulation(
    genSim, arguments, outfile, save_times, save_var_names)
 if(args.profile):
    t_total_end = time.time()
    t_total = t_total_end - t_total_start
    print("Total run time on rank " + str(MPI.COMM_WORLD.rank) + " is " + str(t_total) + " s")
 # write profiling to json file
 if(args.profile and MPI.COMM_WORLD.rank == 0):
    if "SLURM_JOB_ID" in os.environ:
        job_id = int(os.environ["SLURM_JOB_ID"])
        allocated_nodes = int(os.environ["SLURM_JOB_NUM_NODES"])
        allocated_gpus = int(os.environ["CUDA_VISIBLE_DEVICES"].count(",") + 1)
        profiling_file = "MPI_jobid_" + \
            str(job_id) + "_" + str(allocated_nodes) + "_nodes_and_" + str(allocated_gpus) + "_GPUs_profiling.json"
    else:
        profiling_file = "MPI_test_profiling.json"
    write_profiling_data = {}
    write_profiling_data["total"] = t_total
    with open(profiling_file, "w") as write_file:
        json.dump(write_profiling_data, write_file)
 ####
 # Clean shutdown
@ -134,4 +172,4 @@ gc.collect()
 # Print completion and exit
 ####
 print("Completed!")
-exit(0)
+exit(0)
--- a/saga-dev.job
+++ b/saga-dev.job
@ -0,0 +1,53 @@
 #!/bin/bash
 # Job name:
 #SBATCH --job-name=ShallowWaterGPUScalingDev
 #
 # Project:
 #SBATCH --account=nn9550k
 #
 # Wall clock limit:
 #SBATCH --time=01:00:00
 #
 # Ask for 1 GPU (max is 2)
 # Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU 
 # device(s) to use. It will have values '0', '1' or '0,1' corresponding to 
 # /dev/nvidia0, /dev/nvidia1 or both, respectively.
 #SBATCH --partition=accel --gres=gpu:1
 #
 # Max memory usage per task (core) - increasing this will cost more core hours:
 #SBATCH --mem-per-cpu=16G
 #
 # Number of tasks:
 #SBATCH --nodes=1 --ntasks-per-node=1
 #
 #SBATCH --qos=devel
 ## Set up job environment: (this is done automatically behind the scenes)
 ## (make sure to comment '#' or remove the following line 'source ...')
 # source /cluster/bin/jobsetup
 module restore system   # instead of 'module purge' rather set module environment to the system default
 module load CUDA/10.2.89
 # It is also recommended to to list loaded modules, for easier debugging:
 module list
 set -o errexit # exit on errors
 set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
 ## Copy input files to the work directory:
 mkdir $SCRATCH/ShallowWaterGPU
 cp -r . $SCRATCH/ShallowWaterGPU
 ## Make sure the results are copied back to the submit directory (see Work Directory below):
 # chkfile MyResultFile
 # chkfile is replaced by 'savefile' on Saga
 savefile "$SCRATCH/ShallowWaterGPU/*.log"
 savefile "$SCRATCH/ShallowWaterGPU/*.nc"
 savefile "$SCRATCH/ShallowWaterGPU/*.json"
 ## Do some work:
 cd $SCRATCH/ShallowWaterGPU
 srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
 srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 8192 -ny 8192 --profile
--- a/saga-test.job
+++ b/saga-test.job
@ -1,12 +1,12 @@
 #!/bin/bash
 # Job name:
-#SBATCH --job-name=saga-test
+#SBATCH --job-name=ShallowWaterGPUStrongScaling
 #
 # Project:
 #SBATCH --account=nn9550k
 #
 # Wall clock limit:
-#SBATCH --time=00:10:00
+#SBATCH --time=24:00:00
 #
 # Ask for 1 GPU (max is 2)
 # Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU 
@ -15,10 +15,10 @@
 #SBATCH --partition=accel --gres=gpu:1
 #
 # Max memory usage per task (core) - increasing this will cost more core hours:
-#SBATCH --mem-per-cpu=4G
+#SBATCH --mem-per-cpu=16G
 #
 # Number of tasks:
-#SBATCH --nodes=2 --ntasks-per-node=1
+#SBATCH --nodes=1 --ntasks-per-node=1
 ## Set up job environment: (this is done automatically behind the scenes)
 ## (make sure to comment '#' or remove the following line 'source ...')
@ -42,9 +42,10 @@ cp -r . $SCRATCH/ShallowWaterGPU
 # chkfile is replaced by 'savefile' on Saga
 savefile "$SCRATCH/ShallowWaterGPU/*.log"
 savefile "$SCRATCH/ShallowWaterGPU/*.nc"
 savefile "$SCRATCH/ShallowWaterGPU/*.json"
 ## Do some work:
 cd $SCRATCH/ShallowWaterGPU
-srun /cluster/home/$HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
+srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
-srun /cluster/home/$HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py
+srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 8192 -ny 8192 --profile
--- a/saga_strong_scaling_benchmark.job
+++ b/saga_strong_scaling_benchmark.job
@ -0,0 +1,51 @@
 #!/bin/bash
 # Job name:
 #SBATCH --job-name=ShallowWaterGPUStrongScaling
 #
 # Project:
 #SBATCH --account=nn9550k
 #
 # Wall clock limit:
 #SBATCH --time=10:00:00
 #
 # Ask for 1 GPU (max is 2)
 # Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU 
 # device(s) to use. It will have values '0', '1' or '0,1' corresponding to 
 # /dev/nvidia0, /dev/nvidia1 or both, respectively.
 #SBATCH --partition=accel --gres=gpu:1
 #
 # Max memory usage per task (core) - increasing this will cost more core hours:
 #SBATCH --mem-per-cpu=16G
 #
 # Number of tasks:
 #SBATCH --nodes=1 --ntasks-per-node=1
 ## Set up job environment: (this is done automatically behind the scenes)
 ## (make sure to comment '#' or remove the following line 'source ...')
 # source /cluster/bin/jobsetup
 module restore system   # instead of 'module purge' rather set module environment to the system default
 module load CUDA/10.2.89
 # It is also recommended to to list loaded modules, for easier debugging:
 module list
 set -o errexit # exit on errors
 set -o nounset # Treat unset variables as errors (added for more easily discovering issues in your batch script)
 ## Copy input files to the work directory:
 mkdir $SCRATCH/ShallowWaterGPU
 cp -r . $SCRATCH/ShallowWaterGPU
 ## Make sure the results are copied back to the submit directory (see Work Directory below):
 # chkfile MyResultFile
 # chkfile is replaced by 'savefile' on Saga
 savefile "$SCRATCH/ShallowWaterGPU/*.log"
 savefile "$SCRATCH/ShallowWaterGPU/*.nc"
 savefile "$SCRATCH/ShallowWaterGPU/*.json"
 ## Do some work:
 cd $SCRATCH/ShallowWaterGPU
 srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
 srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 1024 -ny 1024 --profile
--- a/saga_strong_scaling_benchmark.sh
+++ b/saga_strong_scaling_benchmark.sh
@ -0,0 +1,13 @@
 #!/bin/bash
 # one node: 1-4 tasks/GPUs
 sbatch --partition=accel --gres=gpu:1 --nodes=1 --ntasks-per-node=1 saga_strong_scaling_benchmark.job
 sbatch --partition=accel --gres=gpu:2 --nodes=1 --ntasks-per-node=2 saga_strong_scaling_benchmark.job
 sbatch --partition=accel --gres=gpu:3 --nodes=1 --ntasks-per-node=3 saga_strong_scaling_benchmark.job
 sbatch --partition=accel --gres=gpu:4 --nodes=1 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
 # 2-4 nodes: 4 tasks/GPUs per node
 sbatch --partition=accel --gres=gpu:4 --nodes=2 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
 sbatch --partition=accel --gres=gpu:4 --nodes=3 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
 sbatch --partition=accel --gres=gpu:4 --nodes=4 --ntasks-per-node=4 saga_strong_scaling_benchmark.job