mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
Merge branch 'master' of github.com:setmar/ShallowWaterGPU
This commit is contained in:
commit
80d84e0489
@ -95,6 +95,16 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
save_times, and saves all of the variables in list save_var_names. Elements in
|
save_times, and saves all of the variables in list save_var_names. Elements in
|
||||||
save_var_names can be set to None if you do not want to save them
|
save_var_names can be set to None if you do not want to save them
|
||||||
"""
|
"""
|
||||||
|
profiling_data_sim_runner = { 'start': {}, 'end': {} }
|
||||||
|
profiling_data_sim_runner["start"]["t_sim_init"] = 0
|
||||||
|
profiling_data_sim_runner["end"]["t_sim_init"] = 0
|
||||||
|
profiling_data_sim_runner["start"]["t_nc_write"] = 0
|
||||||
|
profiling_data_sim_runner["end"]["t_nc_write"] = 0
|
||||||
|
profiling_data_sim_runner["start"]["t_step"] = 0
|
||||||
|
profiling_data_sim_runner["end"]["t_step"] = 0
|
||||||
|
|
||||||
|
profiling_data_sim_runner["start"]["t_sim_init"] = time.time()
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
assert len(save_times) > 0, "Need to specify which times to save"
|
assert len(save_times) > 0, "Need to specify which times to save"
|
||||||
@ -146,6 +156,8 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
t_steps[0] = save_times[0]
|
t_steps[0] = save_times[0]
|
||||||
t_steps[1:] = save_times[1:] - save_times[0:-1]
|
t_steps[1:] = save_times[1:] - save_times[0:-1]
|
||||||
|
|
||||||
|
profiling_data_sim_runner["end"]["t_sim_init"] = time.time()
|
||||||
|
|
||||||
#Start simulation loop
|
#Start simulation loop
|
||||||
progress_printer = ProgressPrinter(save_times[-1], print_every=10)
|
progress_printer = ProgressPrinter(save_times[-1], print_every=10)
|
||||||
for k in range(len(save_times)):
|
for k in range(len(save_times)):
|
||||||
@ -160,18 +172,24 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
logger.error("Error after {:d} steps (t={:f}: {:s}".format(sim.simSteps(), sim.simTime(), str(e)))
|
logger.error("Error after {:d} steps (t={:f}: {:s}".format(sim.simSteps(), sim.simTime(), str(e)))
|
||||||
return outdata.filename
|
return outdata.filename
|
||||||
|
|
||||||
|
profiling_data_sim_runner["start"]["t_step"] += time.time()
|
||||||
|
|
||||||
#Simulate
|
#Simulate
|
||||||
if (t_step > 0.0):
|
if (t_step > 0.0):
|
||||||
sim.simulate(t_step)
|
sim.simulate(t_step)
|
||||||
|
|
||||||
|
profiling_data_sim_runner["end"]["t_step"] += time.time()
|
||||||
|
|
||||||
|
profiling_data_sim_runner["start"]["t_nc_write"] += time.time()
|
||||||
|
|
||||||
#Download
|
#Download
|
||||||
"""
|
|
||||||
save_vars = sim.download(download_vars)
|
save_vars = sim.download(download_vars)
|
||||||
|
|
||||||
#Save to file
|
#Save to file
|
||||||
for i, var_name in enumerate(save_var_names):
|
for i, var_name in enumerate(save_var_names):
|
||||||
ncvars[var_name][k, :] = save_vars[i]
|
ncvars[var_name][k, :] = save_vars[i]
|
||||||
"""
|
|
||||||
|
profiling_data_sim_runner["end"]["t_nc_write"] += time.time()
|
||||||
|
|
||||||
#Write progress to screen
|
#Write progress to screen
|
||||||
print_string = progress_printer.getPrintString(t_end)
|
print_string = progress_printer.getPrintString(t_end)
|
||||||
@ -180,7 +198,7 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
|
|
||||||
logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(t_end, sim.simSteps(), sim.simTime() / sim.simSteps()))
|
logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(t_end, sim.simSteps(), sim.simTime() / sim.simSteps()))
|
||||||
|
|
||||||
return outdata.filename, sim.profiling_data
|
return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -201,13 +201,13 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
Class which handles communication between simulators on different MPI nodes
|
Class which handles communication between simulators on different MPI nodes
|
||||||
"""
|
"""
|
||||||
def __init__(self, sim, grid):
|
def __init__(self, sim, grid):
|
||||||
self.profiling_data = { 'start': {}, 'end': {} }
|
self.profiling_data_mpi = { 'start': {}, 'end': {} }
|
||||||
self.profiling_data["start"]["t_halo_exchange"] = 0
|
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange"] = 0
|
||||||
self.profiling_data["end"]["t_halo_exchange"] = 0
|
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange"] = 0
|
||||||
self.profiling_data["start"]["t_step"] = 0
|
self.profiling_data_mpi["start"]["t_step_mpi"] = 0
|
||||||
self.profiling_data["end"]["t_step"] = 0
|
self.profiling_data_mpi["end"]["t_step_mpi"] = 0
|
||||||
self.profiling_data["n_time_steps"] = 0
|
self.profiling_data_mpi["n_time_steps"] = 0
|
||||||
self.profiling_data["start"]["t_mpi_sim_init"] = time.time()
|
self.profiling_data_mpi["start"]["t_sim_mpi_init"] = time.time()
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
autotuner = sim.context.autotuner
|
autotuner = sim.context.autotuner
|
||||||
@ -292,17 +292,25 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
self.out_s = np.empty_like(self.in_s)
|
self.out_s = np.empty_like(self.in_s)
|
||||||
|
|
||||||
self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name()))
|
self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name()))
|
||||||
self.profiling_data["end"]["t_mpi_sim_init"] = time.time()
|
self.profiling_data_mpi["end"]["t_sim_mpi_init"] = time.time()
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
self.profiling_data["start"]["t_halo_exchange"] += time.time()
|
if self.profiling_data_mpi["n_time_steps"] > 0:
|
||||||
self.exchange()
|
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange"] += time.time()
|
||||||
self.profiling_data["end"]["t_halo_exchange"] += time.time()
|
|
||||||
|
self.exchange()
|
||||||
|
|
||||||
|
self.sim.stream.synchronize() # only necessary for profiling!
|
||||||
|
if self.profiling_data_mpi["n_time_steps"] > 0:
|
||||||
|
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange"] += time.time()
|
||||||
|
self.profiling_data_mpi["start"]["t_step_mpi"] += time.time()
|
||||||
|
|
||||||
self.profiling_data["start"]["t_step"] += time.time()
|
|
||||||
self.sim.substep(dt, step_number)
|
self.sim.substep(dt, step_number)
|
||||||
self.profiling_data["end"]["t_step"] += time.time()
|
|
||||||
self.profiling_data["n_time_steps"] += 1
|
self.sim.stream.synchronize() # only necessary for profiling!
|
||||||
|
if self.profiling_data_mpi["n_time_steps"] > 0:
|
||||||
|
self.profiling_data_mpi["end"]["t_step_mpi"] += time.time()
|
||||||
|
self.profiling_data_mpi["n_time_steps"] += 1
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
return self.sim.getOutput()
|
return self.sim.getOutput()
|
||||||
@ -423,5 +431,3 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
for comm in comm_send:
|
for comm in comm_send:
|
||||||
comm.wait()
|
comm.wait()
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -111,7 +111,7 @@ nx = args.nx
|
|||||||
ny = args.ny
|
ny = args.ny
|
||||||
|
|
||||||
gamma = 1.4
|
gamma = 1.4
|
||||||
save_times = np.linspace(0, 0.1, 2)
|
save_times = np.linspace(0, 0.5, 2)
|
||||||
outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc"
|
outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc"
|
||||||
save_var_names = ['rho', 'rho_u', 'rho_v', 'E']
|
save_var_names = ['rho', 'rho_u', 'rho_v', 'E']
|
||||||
|
|
||||||
@ -138,7 +138,7 @@ def genSim(grid, **kwargs):
|
|||||||
return sim
|
return sim
|
||||||
|
|
||||||
|
|
||||||
outfile, sim_profiling_data = Common.runSimulation(
|
outfile, sim_runner_profiling_data, sim_profiling_data = Common.runSimulation(
|
||||||
genSim, arguments, outfile, save_times, save_var_names)
|
genSim, arguments, outfile, save_times, save_var_names)
|
||||||
|
|
||||||
if(args.profile):
|
if(args.profile):
|
||||||
@ -159,6 +159,9 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):
|
|||||||
else:
|
else:
|
||||||
profiling_file = "MPI_" + str(MPI.COMM_WORLD.size) + "_procs_and_" + str(num_cuda_devices) + "_GPUs_profiling.json"
|
profiling_file = "MPI_" + str(MPI.COMM_WORLD.size) + "_procs_and_" + str(num_cuda_devices) + "_GPUs_profiling.json"
|
||||||
|
|
||||||
|
for stage in sim_runner_profiling_data["start"].keys():
|
||||||
|
profiling_data[stage] = sim_runner_profiling_data["end"][stage] - sim_runner_profiling_data["start"][stage]
|
||||||
|
|
||||||
for stage in sim_profiling_data["start"].keys():
|
for stage in sim_profiling_data["start"].keys():
|
||||||
profiling_data[stage] = sim_profiling_data["end"][stage] - sim_profiling_data["start"][stage]
|
profiling_data[stage] = sim_profiling_data["end"][stage] - sim_profiling_data["start"][stage]
|
||||||
|
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
#SBATCH --account=nn9882k
|
#SBATCH --account=nn9882k
|
||||||
#
|
#
|
||||||
# Wall clock limit:
|
# Wall clock limit:
|
||||||
#SBATCH --time=00:20:00
|
#SBATCH --time=00:02:00
|
||||||
#
|
#
|
||||||
# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below
|
# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below
|
||||||
#
|
#
|
||||||
@ -28,7 +28,7 @@
|
|||||||
# source /cluster/bin/jobsetup
|
# source /cluster/bin/jobsetup
|
||||||
|
|
||||||
module restore system # instead of 'module purge' rather set module environment to the system default
|
module restore system # instead of 'module purge' rather set module environment to the system default
|
||||||
module load CUDA/10.2.89
|
module load CUDA/11.4.1
|
||||||
|
|
||||||
# It is also recommended to to list loaded modules, for easier debugging:
|
# It is also recommended to to list loaded modules, for easier debugging:
|
||||||
module list
|
module list
|
||||||
@ -41,7 +41,7 @@ mkdir $SCRATCH/ShallowWaterGPU
|
|||||||
cp -r . $SCRATCH/ShallowWaterGPU
|
cp -r . $SCRATCH/ShallowWaterGPU
|
||||||
|
|
||||||
## Make sure the results are copied back to the submit directory (see Work Directory below):
|
## Make sure the results are copied back to the submit directory (see Work Directory below):
|
||||||
# chkfile MyResultFile
|
# chkfile MyResultFileq
|
||||||
# chkfile is replaced by 'savefile' on Saga
|
# chkfile is replaced by 'savefile' on Saga
|
||||||
savefile "$SCRATCH/ShallowWaterGPU/*.log"
|
savefile "$SCRATCH/ShallowWaterGPU/*.log"
|
||||||
savefile "$SCRATCH/ShallowWaterGPU/*.nc"
|
savefile "$SCRATCH/ShallowWaterGPU/*.nc"
|
||||||
|
@ -6,26 +6,26 @@
|
|||||||
#SBATCH --account=nn9882k
|
#SBATCH --account=nn9882k
|
||||||
#
|
#
|
||||||
# Wall clock limit:
|
# Wall clock limit:
|
||||||
#SBATCH --time=10:00:00
|
#SBATCH --time=00:10:00
|
||||||
|
#
|
||||||
|
# NOTE: See https://documentation.sigma2.no/jobs/projects_accounting.html when adjusting the values below
|
||||||
#
|
#
|
||||||
# Ask for 1 GPU (max is 2)
|
|
||||||
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
|
# Note: The environment variable CUDA_VISIBLE_DEVICES will show which GPU
|
||||||
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
|
# device(s) to use. It will have values '0', '1' or '0,1' corresponding to
|
||||||
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
|
# /dev/nvidia0, /dev/nvidia1 or both, respectively.
|
||||||
#SBATCH --partition=accel --gres=gpu:1
|
#SBATCH --partition=accel
|
||||||
#
|
#
|
||||||
# Max memory usage per task (core) - increasing this will cost more core hours:
|
# Max memory usage per task (core) - increasing this will cost more core hours:
|
||||||
#SBATCH --mem-per-cpu=16G
|
#SBATCH --mem-per-cpu=3800M
|
||||||
#
|
#
|
||||||
# Number of tasks:
|
#SBATCH --qos=devel
|
||||||
#SBATCH --nodes=1 --ntasks-per-node=1
|
|
||||||
|
|
||||||
## Set up job environment: (this is done automatically behind the scenes)
|
## Set up job environment: (this is done automatically behind the scenes)
|
||||||
## (make sure to comment '#' or remove the following line 'source ...')
|
## (make sure to comment '#' or remove the following line 'source ...')
|
||||||
# source /cluster/bin/jobsetup
|
# source /cluster/bin/jobsetup
|
||||||
|
|
||||||
module restore system # instead of 'module purge' rather set module environment to the system default
|
module restore system # instead of 'module purge' rather set module environment to the system default
|
||||||
module load CUDA/10.2.89
|
module load CUDA/11.4.1
|
||||||
|
|
||||||
# It is also recommended to to list loaded modules, for easier debugging:
|
# It is also recommended to to list loaded modules, for easier debugging:
|
||||||
module list
|
module list
|
||||||
@ -47,5 +47,5 @@ savefile "$SCRATCH/ShallowWaterGPU/*.json"
|
|||||||
## Do some work:
|
## Do some work:
|
||||||
cd $SCRATCH/ShallowWaterGPU
|
cd $SCRATCH/ShallowWaterGPU
|
||||||
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
|
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 --version
|
||||||
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx 1024 -ny 1024 --profile
|
srun $HOME/.conda/envs/ShallowWaterGPU_HPC/bin/python3 mpiTesting.py -nx $NX -ny $NY --profile
|
||||||
|
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# one node: 1-4 tasks/GPUs
|
# one node: 1-4 GPUs
|
||||||
sbatch --partition=accel --gres=gpu:1 --nodes=1 --ntasks-per-node=1 saga_strong_scaling_benchmark.job
|
sbatch --nodes=1 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=1024,NY=1024 saga_strong_scaling_benchmark.job
|
||||||
sbatch --partition=accel --gres=gpu:2 --nodes=1 --ntasks-per-node=2 saga_strong_scaling_benchmark.job
|
sbatch --nodes=1 --gpus-per-node=2 --ntasks-per-node=2 --export=ALL,NX=1024,NY=512 saga_strong_scaling_benchmark.job
|
||||||
sbatch --partition=accel --gres=gpu:3 --nodes=1 --ntasks-per-node=3 saga_strong_scaling_benchmark.job
|
sbatch --nodes=1 --gpus-per-node=3 --ntasks-per-node=3 --export=ALL,NX=1024,NY=341 saga_strong_scaling_benchmark.job
|
||||||
sbatch --partition=accel --gres=gpu:4 --nodes=1 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
|
sbatch --nodes=1 --gpus-per-node=4 --ntasks-per-node=4 --export=ALL,NX=512,NY=512 saga_strong_scaling_benchmark.job
|
||||||
|
|
||||||
# 2-4 nodes: 4 tasks/GPUs per node
|
# 2-4 nodes: 1 GPUs per node
|
||||||
sbatch --partition=accel --gres=gpu:4 --nodes=2 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
|
sbatch --nodes=2 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=1024,NY=512 saga_strong_scaling_benchmark.job
|
||||||
sbatch --partition=accel --gres=gpu:4 --nodes=3 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
|
sbatch --nodes=3 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=1024,NY=341 saga_strong_scaling_benchmark.job
|
||||||
sbatch --partition=accel --gres=gpu:4 --nodes=4 --ntasks-per-node=4 saga_strong_scaling_benchmark.job
|
sbatch --nodes=4 --gpus-per-node=1 --ntasks-per-node=1 --export=ALL,NX=512,NY=512 saga_strong_scaling_benchmark.job
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user