Profiling code for DGX-2. Added fixed dt.

2025-07-18 19:31:26 +02:00 · 2022-04-08 15:09:09 +02:00 · 2022-04-08 15:09:09 +02:00 · d83d620512
commit d83d620512
parent 80d84e0489
5 changed files with 92 additions and 30 deletions
--- a/Figures.ipynb
+++ b/Figures.ipynb
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -89,7 +89,7 @@ def toJson(in_dict, compressed=True):
                out_dict[key] = value
    return json.dumps(out_dict)

-def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names=[]):
+def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None):
    """
    Runs a simulation, and stores output in netcdf file. Stores the times given in 
    save_times, and saves all of the variables in list save_var_names. Elements in  
@ -176,7 +176,7 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names

            #Simulate
            if (t_step > 0.0):
-                sim.simulate(t_step)
+                sim.simulate(t_step, dt)

            profiling_data_sim_runner["end"]["t_step"] += time.time()

--- a/dgx-2_strong_scaling_benchmark.job
+++ b/dgx-2_strong_scaling_benchmark.job
@ -0,0 +1,36 @@
+#!/bin/bash
+# See http://wiki.ex3.simula.no before changing the values below
+#SBATCH -p dgx2q                   # partition (GPU queue)
+#SBATCH -w g001                    # DGX-2 node
+##SBATCH --gres=gpu:4               # number of V100's
+#SBATCH -t 0-00:10                 # time (D-HH:MM)
+#SBATCH -o slurm.%N.%j.out  # STDOUT
+#SBATCH -e slurm.%N.%j.err  # STDERR
+
+ulimit -s 10240
+module load slurm/20.02.7
+module load cuda11.2/toolkit/11.2.2
+module load openmpi4-cuda11.2-ofed50-gcc8/4.1.0
+
+# Check how many gpu's your job got
+#nvidia-smi
+
+## Copy input files to the work directory:
+mkdir -p /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
+cp -r . /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
+
+# Run job
+# (Assumes Miniconda is installed in user root dir.)
+cd /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU
+#mpirun --mca btl_openib_if_include mlx5_0 --mca btl_openib_warn_no_device_params_found 0 $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
+mpirun -np  $SLURM_NTASKS numactl --cpunodebind=0 --localalloc $HOME/miniconda3/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
+cd $HOME/src/ShallowWaterGPU
+
+## Copy files from work directory:
+# (NOTE: Copying is not performed if job fails!)
+mkdir -p output/$SLURM_JOB_ID
+mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.log ./output/$SLURM_JOB_ID
+mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.nc ./output/$SLURM_JOB_ID
+mv /work/$USER/$SLURM_JOB_ID/ShallowWaterGPU/*.json .
+
+rm -rf /work/$USER/$SLURM_JOB_ID
--- a/dgx-2_strong_scaling_benchmark.sh
+++ b/dgx-2_strong_scaling_benchmark.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+
+# one node: 1-8 GPUs
+sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=4096,NY=4096 dgx-2_strong_scaling_benchmark.job
+sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=4096,NY=2048 dgx-2_strong_scaling_benchmark.job
+sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=4096,NY=1365 dgx-2_strong_scaling_benchmark.job
+sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=4096,NY=1024 dgx-2_strong_scaling_benchmark.job
+#sbatch --nodes=1 --gpus-per-node=5 --ntasks-per-node=5 --export=ALL,NX=4096,NY=819 dgx-2_strong_scaling_benchmark.job
+#sbatch --nodes=1 --gpus-per-node=6 --ntasks-per-node=6 --export=ALL,NX=4096,NY=683 dgx-2_strong_scaling_benchmark.job
+#sbatch --nodes=1 --gpus-per-node=7 --ntasks-per-node=7 --export=ALL,NX=4096,NY=585 dgx-2_strong_scaling_benchmark.job
+#sbatch --nodes=1 --gpus-per-node=8 --ntasks-per-node=8 --export=ALL,NX=4096,NY=512 dgx-2_strong_scaling_benchmark.job
--- a/mpiTesting.py
+++ b/mpiTesting.py
@ -110,8 +110,10 @@ logger.info("Generating initial conditions")
 nx = args.nx
 ny = args.ny

+dt = 0.00001
+
 gamma = 1.4
-save_times = np.linspace(0, 0.5, 2)
+save_times = np.linspace(0, 0.1, 2)
 outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc"
 save_var_names = ['rho', 'rho_u', 'rho_v', 'E']

@ -139,7 +141,7 @@ def genSim(grid, **kwargs):


 outfile, sim_runner_profiling_data, sim_profiling_data = Common.runSimulation(
-    genSim, arguments, outfile, save_times, save_var_names)
+    genSim, arguments, outfile, save_times, save_var_names, dt)

 if(args.profile):
    t_total_end = time.time()
@ -149,6 +151,7 @@ if(args.profile):

 # write profiling to json file
 if(args.profile and MPI.COMM_WORLD.rank == 0):
+    job_id = ""
    if "SLURM_JOB_ID" in os.environ:
        job_id = int(os.environ["SLURM_JOB_ID"])
        allocated_nodes = int(os.environ["SLURM_JOB_NUM_NODES"])
@ -167,8 +170,13 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):

    profiling_data["nx"] = nx
    profiling_data["ny"] = ny
+    profiling_data["dt"] = dt
    profiling_data["n_time_steps"] = sim_profiling_data["n_time_steps"]

+    profiling_data["slurm_job_id"] = job_id
+    profiling_data["n_cuda_devices"] = str(num_cuda_devices)
+    profiling_data["n_processes"] = str(MPI.COMM_WORLD.size)
+
    with open(profiling_file, "w") as write_file:
        json.dump(profiling_data, write_file)