mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
Bugfix for ghost cell exchange and nc atts
This commit is contained in:
parent
4d03e6cfbc
commit
a588948e77
@ -100,8 +100,8 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
profiling_data_sim_runner["end"]["t_sim_init"] = 0
|
profiling_data_sim_runner["end"]["t_sim_init"] = 0
|
||||||
profiling_data_sim_runner["start"]["t_nc_write"] = 0
|
profiling_data_sim_runner["start"]["t_nc_write"] = 0
|
||||||
profiling_data_sim_runner["end"]["t_nc_write"] = 0
|
profiling_data_sim_runner["end"]["t_nc_write"] = 0
|
||||||
profiling_data_sim_runner["start"]["t_step"] = 0
|
profiling_data_sim_runner["start"]["t_full_step"] = 0
|
||||||
profiling_data_sim_runner["end"]["t_step"] = 0
|
profiling_data_sim_runner["end"]["t_full_step"] = 0
|
||||||
|
|
||||||
profiling_data_sim_runner["start"]["t_sim_init"] = time.time()
|
profiling_data_sim_runner["start"]["t_sim_init"] = time.time()
|
||||||
|
|
||||||
@ -121,7 +121,14 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
outdata.ncfile.git_hash = getGitHash()
|
outdata.ncfile.git_hash = getGitHash()
|
||||||
outdata.ncfile.git_status = getGitStatus()
|
outdata.ncfile.git_status = getGitStatus()
|
||||||
outdata.ncfile.simulator = str(simulator)
|
outdata.ncfile.simulator = str(simulator)
|
||||||
outdata.ncfile.sim_args = toJson(simulator_args)
|
|
||||||
|
# do not write fields to attributes (they are to large)
|
||||||
|
simulator_args_for_ncfile = simulator_args.copy()
|
||||||
|
del simulator_args_for_ncfile["rho"]
|
||||||
|
del simulator_args_for_ncfile["rho_u"]
|
||||||
|
del simulator_args_for_ncfile["rho_v"]
|
||||||
|
del simulator_args_for_ncfile["E"]
|
||||||
|
outdata.ncfile.sim_args = toJson(simulator_args_for_ncfile)
|
||||||
|
|
||||||
#Create dimensions
|
#Create dimensions
|
||||||
outdata.ncfile.createDimension('time', len(save_times))
|
outdata.ncfile.createDimension('time', len(save_times))
|
||||||
@ -172,13 +179,13 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
|
|||||||
logger.error("Error after {:d} steps (t={:f}: {:s}".format(sim.simSteps(), sim.simTime(), str(e)))
|
logger.error("Error after {:d} steps (t={:f}: {:s}".format(sim.simSteps(), sim.simTime(), str(e)))
|
||||||
return outdata.filename
|
return outdata.filename
|
||||||
|
|
||||||
profiling_data_sim_runner["start"]["t_step"] += time.time()
|
profiling_data_sim_runner["start"]["t_full_step"] += time.time()
|
||||||
|
|
||||||
#Simulate
|
#Simulate
|
||||||
if (t_step > 0.0):
|
if (t_step > 0.0):
|
||||||
sim.simulate(t_step, dt)
|
sim.simulate(t_step, dt)
|
||||||
|
|
||||||
profiling_data_sim_runner["end"]["t_step"] += time.time()
|
profiling_data_sim_runner["end"]["t_full_step"] += time.time()
|
||||||
|
|
||||||
profiling_data_sim_runner["start"]["t_nc_write"] += time.time()
|
profiling_data_sim_runner["start"]["t_nc_write"] += time.time()
|
||||||
|
|
||||||
|
@ -138,9 +138,9 @@ class EE2D_KP07_dimsplit (BaseSimulator):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if external and not internal:
|
if external and not internal:
|
||||||
#############################################################
|
###################################
|
||||||
# XXX: Only treating north and south external cells for now #
|
# XXX: Corners are treated twice! #
|
||||||
#############################################################
|
###################################
|
||||||
|
|
||||||
ns_grid_size = (self.grid_size[0], 1)
|
ns_grid_size = (self.grid_size[0], 1)
|
||||||
|
|
||||||
@ -189,13 +189,57 @@ class EE2D_KP07_dimsplit (BaseSimulator):
|
|||||||
self.cfl_data.gpudata,
|
self.cfl_data.gpudata,
|
||||||
0, 0,
|
0, 0,
|
||||||
self.nx, int(self.u0[0].y_halo))
|
self.nx, int(self.u0[0].y_halo))
|
||||||
|
|
||||||
|
we_grid_size = (1, self.grid_size[1])
|
||||||
|
|
||||||
|
# WEST
|
||||||
|
# (x0, y0) x (x1, y1)
|
||||||
|
# (0, 0) x (x_halo, ny)
|
||||||
|
self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,
|
||||||
|
self.nx, self.ny,
|
||||||
|
self.dx, self.dy, dt,
|
||||||
|
self.g,
|
||||||
|
self.gamma,
|
||||||
|
self.theta,
|
||||||
|
substep,
|
||||||
|
self.boundary_conditions,
|
||||||
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||||
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||||
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||||
|
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||||
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
|
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||||
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
int(self.u0[0].x_halo), self.ny)
|
||||||
|
|
||||||
|
# EAST
|
||||||
|
# (x0, y0) x (x1, y1)
|
||||||
|
# (nx-x_halo, 0) x (nx, ny)
|
||||||
|
self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,
|
||||||
|
self.nx, self.ny,
|
||||||
|
self.dx, self.dy, dt,
|
||||||
|
self.g,
|
||||||
|
self.gamma,
|
||||||
|
self.theta,
|
||||||
|
substep,
|
||||||
|
self.boundary_conditions,
|
||||||
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||||
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||||
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||||
|
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||||
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
|
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||||
|
self.cfl_data.gpudata,
|
||||||
|
self.nx - int(self.u0[0].x_halo), 0,
|
||||||
|
self.nx, self.ny)
|
||||||
return
|
return
|
||||||
|
|
||||||
if internal and not external:
|
if internal and not external:
|
||||||
#############################################################
|
|
||||||
# XXX: Only treating north and south external cells for now #
|
|
||||||
# So we need to include west and east boundary here! #
|
|
||||||
#############################################################
|
|
||||||
|
|
||||||
# INTERNAL DOMAIN
|
# INTERNAL DOMAIN
|
||||||
# (x0, y0) x (x1, y1)
|
# (x0, y0) x (x1, y1)
|
||||||
@ -217,8 +261,8 @@ class EE2D_KP07_dimsplit (BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||||
self.cfl_data.gpudata,
|
self.cfl_data.gpudata,
|
||||||
0, int(self.u0[0].y_halo),
|
int(self.u0[0].x_halo), int(self.u0[0].y_halo),
|
||||||
self.nx, self.ny - int(self.u0[0].y_halo))
|
self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo))
|
||||||
return
|
return
|
||||||
|
|
||||||
def swapBuffers(self):
|
def swapBuffers(self):
|
||||||
|
@ -208,18 +208,17 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
"""
|
"""
|
||||||
def __init__(self, sim, grid):
|
def __init__(self, sim, grid):
|
||||||
self.profiling_data_mpi = { 'start': {}, 'end': {} }
|
self.profiling_data_mpi = { 'start': {}, 'end': {} }
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange"] = 0
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange"] = 0
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange"] = 0
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange"] = 0
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_download"] = 0
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_download"] = 0
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_download"] = 0
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] = 0
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_upload"] = 0
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_upload"] = 0
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_upload"] = 0
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_upload"] = 0
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_sendreceive"] = 0
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_sendreceive"] = 0
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_sendreceive"] = 0
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_sendreceive"] = 0
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi"] = 0
|
self.profiling_data_mpi["start"]["t_mpi_step"] = 0
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi"] = 0
|
self.profiling_data_mpi["end"]["t_mpi_step"] = 0
|
||||||
self.profiling_data_mpi["n_time_steps"] = 0
|
self.profiling_data_mpi["n_time_steps"] = 0
|
||||||
self.profiling_data_mpi["start"]["t_sim_mpi_init"] = time.time()
|
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
autotuner = sim.context.autotuner
|
autotuner = sim.context.autotuner
|
||||||
@ -297,43 +296,43 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
#Note that east and west also transfer ghost cells
|
#Note that east and west also transfer ghost cells
|
||||||
#whilst north/south only transfer internal cells
|
#whilst north/south only transfer internal cells
|
||||||
#Reuses the width/height defined in the read-extets above
|
#Reuses the width/height defined in the read-extets above
|
||||||
self.in_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty((self.nvars, self.read_e[3], self.read_e[2]), dtype=np.float32)
|
self.in_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])), dtype=np.float32) #np.empty((self.nvars, self.read_e[3], self.read_e[2]), dtype=np.float32)
|
||||||
self.in_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty((self.nvars, self.read_w[3], self.read_w[2]), dtype=np.float32)
|
self.in_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])), dtype=np.float32) #np.empty((self.nvars, self.read_w[3], self.read_w[2]), dtype=np.float32)
|
||||||
self.in_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty((self.nvars, self.read_n[3], self.read_n[2]), dtype=np.float32)
|
self.in_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])), dtype=np.float32) #np.empty((self.nvars, self.read_n[3], self.read_n[2]), dtype=np.float32)
|
||||||
self.in_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty((self.nvars, self.read_s[3], self.read_s[2]), dtype=np.float32)
|
self.in_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])), dtype=np.float32) #np.empty((self.nvars, self.read_s[3], self.read_s[2]), dtype=np.float32)
|
||||||
|
|
||||||
#Allocate data for sending
|
#Allocate data for sending
|
||||||
self.out_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty_like(self.in_e)
|
self.out_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])), dtype=np.float32) #np.empty_like(self.in_e)
|
||||||
self.out_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty_like(self.in_w)
|
self.out_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])), dtype=np.float32) #np.empty_like(self.in_w)
|
||||||
self.out_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty_like(self.in_n)
|
self.out_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])), dtype=np.float32) #np.empty_like(self.in_n)
|
||||||
self.out_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) #np.empty_like(self.in_s)
|
self.out_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])), dtype=np.float32) #np.empty_like(self.in_s)
|
||||||
|
|
||||||
self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name()))
|
self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name()))
|
||||||
self.profiling_data_mpi["end"]["t_sim_mpi_init"] = time.time()
|
|
||||||
|
|
||||||
self.old_exchange()
|
self.full_exchange()
|
||||||
|
sim.context.synchronize()
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
|
|
||||||
nvtx.mark("substep start", color="yellow")
|
nvtx.mark("substep start", color="yellow")
|
||||||
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi"] += time.time()
|
self.profiling_data_mpi["start"]["t_mpi_step"] += time.time()
|
||||||
|
|
||||||
nvtx.mark("substep internal", color="red")
|
|
||||||
self.sim.substep(dt, step_number, internal=True, external=False) # "internal ghost cells" excluded
|
|
||||||
|
|
||||||
nvtx.mark("substep external", color="blue")
|
nvtx.mark("substep external", color="blue")
|
||||||
self.sim.substep(dt, step_number, external=True, internal=False) # only "internal ghost cells"
|
self.sim.substep(dt, step_number, external=True, internal=False) # only "internal ghost cells"
|
||||||
|
|
||||||
|
nvtx.mark("substep internal", color="red")
|
||||||
|
self.sim.substep(dt, step_number, internal=True, external=False) # "internal ghost cells" excluded
|
||||||
|
|
||||||
#nvtx.mark("substep full", color="blue")
|
#nvtx.mark("substep full", color="blue")
|
||||||
#self.sim.substep(dt, step_number, external=True, internal=True)
|
#self.sim.substep(dt, step_number, external=True, internal=True)
|
||||||
|
|
||||||
self.sim.swapBuffers()
|
self.sim.swapBuffers()
|
||||||
|
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi"] += time.time()
|
self.profiling_data_mpi["end"]["t_mpi_step"] += time.time()
|
||||||
|
|
||||||
nvtx.mark("exchange", color="blue")
|
nvtx.mark("exchange", color="blue")
|
||||||
self.old_exchange()
|
self.full_exchange()
|
||||||
|
|
||||||
#nvtx.mark("download", color="blue")
|
#nvtx.mark("download", color="blue")
|
||||||
#self.download_for_exchange(self.sim.u0)
|
#self.download_for_exchange(self.sim.u0)
|
||||||
@ -383,8 +382,7 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
return [x0, x1, y0, y1]
|
return [x0, x1, y0, y1]
|
||||||
|
|
||||||
def download_for_exchange(self, u):
|
def download_for_exchange(self, u):
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_download"] += time.time()
|
|
||||||
|
|
||||||
# North-south
|
# North-south
|
||||||
if self.north is not None:
|
if self.north is not None:
|
||||||
@ -406,12 +404,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
u[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
u[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
||||||
#self.out_w[k,:,:] = u[k].download(self.sim.stream, asynch=True, extent=self.read_w)
|
#self.out_w[k,:,:] = u[k].download(self.sim.stream, asynch=True, extent=self.read_w)
|
||||||
|
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_download"] += time.time()
|
|
||||||
|
|
||||||
def exchange(self):
|
def exchange(self):
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
|
||||||
#Send/receive to north/south neighbours
|
#Send/receive to north/south neighbours
|
||||||
comm_send = []
|
comm_send = []
|
||||||
@ -441,12 +437,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
for comm in comm_send:
|
for comm in comm_send:
|
||||||
comm.wait()
|
comm.wait()
|
||||||
|
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
|
||||||
def upload_for_exchange(self, u):
|
def upload_for_exchange(self, u):
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_upload"] += time.time()
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_upload"] += time.time()
|
|
||||||
|
|
||||||
# North-south
|
# North-south
|
||||||
if self.north is not None:
|
if self.north is not None:
|
||||||
@ -464,15 +458,11 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
u[k].upload(self.sim.stream, self.in_w[k,:,:], extent=self.write_w)
|
u[k].upload(self.sim.stream, self.in_w[k,:,:], extent=self.write_w)
|
||||||
|
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_upload"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_upload"] += time.time()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def full_exchange(self):
|
||||||
|
|
||||||
|
|
||||||
def old_exchange(self):
|
|
||||||
####
|
####
|
||||||
# FIXME: This function can be optimized using persitent communications.
|
# FIXME: This function can be optimized using persitent communications.
|
||||||
# Also by overlapping some of the communications north/south and east/west of GPU and intra-node
|
# Also by overlapping some of the communications north/south and east/west of GPU and intra-node
|
||||||
@ -484,8 +474,7 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
####
|
####
|
||||||
|
|
||||||
#Download from the GPU
|
#Download from the GPU
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_download"] += time.time()
|
|
||||||
|
|
||||||
if self.north is not None:
|
if self.north is not None:
|
||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
@ -495,10 +484,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s)
|
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s)
|
||||||
self.sim.stream.synchronize()
|
self.sim.stream.synchronize()
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
|
|
||||||
#Send/receive to north/south neighbours
|
#Send/receive to north/south neighbours
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_download"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
|
||||||
comm_send = []
|
comm_send = []
|
||||||
comm_recv = []
|
comm_recv = []
|
||||||
@ -513,10 +502,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
for comm in comm_recv:
|
for comm in comm_recv:
|
||||||
comm.wait()
|
comm.wait()
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
|
|
||||||
#Upload to the GPU
|
#Upload to the GPU
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_upload"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_upload"] += time.time()
|
|
||||||
|
|
||||||
if self.north is not None:
|
if self.north is not None:
|
||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
@ -525,24 +514,22 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
self.sim.u0[k].upload(self.sim.stream, self.in_s[k,:,:], extent=self.write_s)
|
self.sim.u0[k].upload(self.sim.stream, self.in_s[k,:,:], extent=self.write_s)
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_upload"] += time.time()
|
||||||
|
|
||||||
#Wait for sending to complete
|
#Wait for sending to complete
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_upload"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
|
||||||
for comm in comm_send:
|
for comm in comm_send:
|
||||||
comm.wait()
|
comm.wait()
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
|
|
||||||
####
|
####
|
||||||
# Then transfer east-west including ghost cells that have been filled in by north-south transfer above
|
# Then transfer east-west including ghost cells that have been filled in by north-south transfer above
|
||||||
####
|
####
|
||||||
|
|
||||||
#Download from the GPU
|
#Download from the GPU
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_download"] += time.time()
|
|
||||||
|
|
||||||
if self.east is not None:
|
if self.east is not None:
|
||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
@ -552,10 +539,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
||||||
self.sim.stream.synchronize()
|
self.sim.stream.synchronize()
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
|
|
||||||
#Send/receive to east/west neighbours
|
#Send/receive to east/west neighbours
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_download"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
|
||||||
comm_send = []
|
comm_send = []
|
||||||
comm_recv = []
|
comm_recv = []
|
||||||
@ -566,15 +553,14 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
comm_send += [self.grid.comm.Isend(self.out_w, dest=self.west, tag=4*self.nt + 3)]
|
comm_send += [self.grid.comm.Isend(self.out_w, dest=self.west, tag=4*self.nt + 3)]
|
||||||
comm_recv += [self.grid.comm.Irecv(self.in_w, source=self.west, tag=4*self.nt + 2)]
|
comm_recv += [self.grid.comm.Irecv(self.in_w, source=self.west, tag=4*self.nt + 2)]
|
||||||
|
|
||||||
|
|
||||||
#Wait for incoming transfers to complete
|
#Wait for incoming transfers to complete
|
||||||
for comm in comm_recv:
|
for comm in comm_recv:
|
||||||
comm.wait()
|
comm.wait()
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
|
|
||||||
#Upload to the GPU
|
#Upload to the GPU
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_upload"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_upload"] += time.time()
|
|
||||||
|
|
||||||
if self.east is not None:
|
if self.east is not None:
|
||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
@ -583,13 +569,12 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
self.sim.u0[k].upload(self.sim.stream, self.in_w[k,:,:], extent=self.write_w)
|
self.sim.u0[k].upload(self.sim.stream, self.in_w[k,:,:], extent=self.write_w)
|
||||||
|
|
||||||
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_upload"] += time.time()
|
||||||
|
|
||||||
#Wait for sending to complete
|
#Wait for sending to complete
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["start"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_upload"] += time.time()
|
|
||||||
self.profiling_data_mpi["start"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
|
||||||
for comm in comm_send:
|
for comm in comm_send:
|
||||||
comm.wait()
|
comm.wait()
|
||||||
|
|
||||||
if self.profiling_data_mpi["n_time_steps"] > 0:
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_sendreceive"] += time.time()
|
||||||
self.profiling_data_mpi["end"]["t_step_mpi_halo_exchange_sendreceive"] += time.time()
|
|
||||||
|
@ -183,6 +183,8 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):
|
|||||||
profiling_data["slurm_job_id"] = job_id
|
profiling_data["slurm_job_id"] = job_id
|
||||||
profiling_data["n_cuda_devices"] = str(num_cuda_devices)
|
profiling_data["n_cuda_devices"] = str(num_cuda_devices)
|
||||||
profiling_data["n_processes"] = str(MPI.COMM_WORLD.size)
|
profiling_data["n_processes"] = str(MPI.COMM_WORLD.size)
|
||||||
|
profiling_data["git_hash"] = Common.getGitHash()
|
||||||
|
profiling_data["git_status"] = Common.getGitStatus()
|
||||||
|
|
||||||
with open(profiling_file, "w") as write_file:
|
with open(profiling_file, "w") as write_file:
|
||||||
json.dump(profiling_data, write_file)
|
json.dump(profiling_data, write_file)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user