diff --git a/GPUSimulators/EE2D_KP07_dimsplit.py b/GPUSimulators/EE2D_KP07_dimsplit.py index 47d3235..8d6decf 100644 --- a/GPUSimulators/EE2D_KP07_dimsplit.py +++ b/GPUSimulators/EE2D_KP07_dimsplit.py @@ -70,13 +70,13 @@ class EE2D_KP07_dimsplit (BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 2, block_width, block_height) self.g = np.float32(g) self.gamma = np.float32(gamma) self.theta = np.float32(theta) - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/EE2D_KP07_dimsplit.cu", diff --git a/GPUSimulators/FORCE.py b/GPUSimulators/FORCE.py index 35d593e..2e60be6 100644 --- a/GPUSimulators/FORCE.py +++ b/GPUSimulators/FORCE.py @@ -67,11 +67,11 @@ class FORCE (Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 1, block_width, block_height) self.g = np.float32(g) - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/SWE2D_FORCE.cu", diff --git a/GPUSimulators/HLL.py b/GPUSimulators/HLL.py index 41968c5..9911f9b 100644 --- a/GPUSimulators/HLL.py +++ b/GPUSimulators/HLL.py @@ -62,11 +62,11 @@ class HLL (Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 1, block_width, block_height); self.g = np.float32(g) - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/SWE2D_HLL.cu", diff --git a/GPUSimulators/HLL2.py b/GPUSimulators/HLL2.py index ddecba6..64fa765 100644 --- a/GPUSimulators/HLL2.py +++ b/GPUSimulators/HLL2.py @@ -65,13 +65,12 @@ class HLL2 (Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 2, block_width, block_height); self.g = np.float32(g) self.theta = np.float32(theta) - self.cfl_scale = cfl_scale - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/SWE2D_HLL2.cu", diff --git a/GPUSimulators/KP07.py b/GPUSimulators/KP07.py index e91ba6d..8e2af50 100644 --- a/GPUSimulators/KP07.py +++ b/GPUSimulators/KP07.py @@ -67,13 +67,13 @@ class KP07 (Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, order, block_width, block_height); self.g = np.float32(g) self.theta = np.float32(theta) self.order = np.int32(order) - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/SWE2D_KP07.cu", diff --git a/GPUSimulators/KP07_dimsplit.py b/GPUSimulators/KP07_dimsplit.py index 6052c9a..c90cf07 100644 --- a/GPUSimulators/KP07_dimsplit.py +++ b/GPUSimulators/KP07_dimsplit.py @@ -67,6 +67,7 @@ class KP07_dimsplit(Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 2, block_width, block_height) @@ -74,7 +75,6 @@ class KP07_dimsplit(Simulator.BaseSimulator): self.gc_y = 2 self.g = np.float32(g) self.theta = np.float32(theta) - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/SWE2D_KP07_dimsplit.cu", diff --git a/GPUSimulators/LxF.py b/GPUSimulators/LxF.py index 91a715a..48d9127 100644 --- a/GPUSimulators/LxF.py +++ b/GPUSimulators/LxF.py @@ -63,11 +63,11 @@ class LxF (Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 1, block_width, block_height); self.g = np.float32(g) - self.boundary_conditions = boundary_conditions.asCodedInt() # Get kernels module = context.get_module("cuda/SWE2D_LxF.cu", diff --git a/GPUSimulators/MPISimulator.py b/GPUSimulators/MPISimulator.py index 950fc24..fb0093c 100644 --- a/GPUSimulators/MPISimulator.py +++ b/GPUSimulators/MPISimulator.py @@ -154,9 +154,11 @@ class MPISimulator(Simulator.BaseSimulator): autotuner = sim.context.autotuner sim.context.autotuner = None; + boundary_conditions = sim.getBoundaryConditions() super().__init__(sim.context, sim.nx, sim.ny, sim.dx, sim.dy, + boundary_conditions, sim.cfl_scale, sim.num_substeps, sim.block_size[0], sim.block_size[1]) @@ -171,12 +173,35 @@ class MPISimulator(Simulator.BaseSimulator): self.north = grid.getNorth() self.south = grid.getSouth() + #Get coordinate of this node + #and handle global boundary conditions + new_boundary_conditions = Simulator.BoundaryCondition({ + 'north': Simulator.BoundaryCondition.Type.Dirichlet, + 'south': Simulator.BoundaryCondition.Type.Dirichlet, + 'east': Simulator.BoundaryCondition.Type.Dirichlet, + 'west': Simulator.BoundaryCondition.Type.Dirichlet + }) + gi, gj = grid.getCoordinate() + if (gi == 0 and boundary_conditions.west != Simulator.BoundaryCondition.Type.Periodic): + self.west = None + new_boundary_conditions.west = boundary_conditions.west; + if (gj == 0 and boundary_conditions.south != Simulator.BoundaryCondition.Type.Periodic): + self.south = None + new_boundary_conditions.south = boundary_conditions.south; + if (gi == grid.grid[0]-1 and boundary_conditions.east != Simulator.BoundaryCondition.Type.Periodic): + self.east = None + new_boundary_conditions.east = boundary_conditions.east; + if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic): + self.north = None + new_boundary_conditions.north = boundary_conditions.north; + sim.setBoundaryConditions(new_boundary_conditions) + #Get number of variables - self.nvars = len(self.sim.u0.gpu_variables) + self.nvars = len(self.getOutput().gpu_variables) #Shorthands for computing extents and sizes - gc_x = int(self.sim.u0[0].x_halo) - gc_y = int(self.sim.u0[0].y_halo) + gc_x = int(self.sim.getOutput()[0].x_halo) + gc_y = int(self.sim.getOutput()[0].y_halo) nx = int(self.sim.nx) ny = int(self.sim.ny) @@ -208,6 +233,8 @@ class MPISimulator(Simulator.BaseSimulator): self.out_n = np.empty_like(self.in_n) self.out_s = np.empty_like(self.in_s) + self.logger.debug("Simlator rank {:d} has neighbors {:s}".format(self.grid.comm.rank, str([self.north, self.south, self.east, self.west]))) + self.logger.debug("Simlator rank {:d} initialized ".format(self.grid.comm.rank)) @@ -258,29 +285,35 @@ class MPISimulator(Simulator.BaseSimulator): #### #Download from the GPU - for k in range(self.nvars): - self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_n[k,:,:], async=True, extent=self.read_n) - self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], async=True, extent=self.read_s) + if self.north is not None: + for k in range(self.nvars): + self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_n[k,:,:], async=True, extent=self.read_n) + if self.south is not None: + for k in range(self.nvars): + self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], async=True, extent=self.read_s) self.sim.stream.synchronize() - #Send to north/south neighbours + #Send/receive to north/south neighbours comm_send = [] - comm_send += [self.grid.comm.Isend(self.out_n, dest=self.north, tag=4*self.nt + 0)] - comm_send += [self.grid.comm.Isend(self.out_s, dest=self.south, tag=4*self.nt + 1)] - - #Receive from north/south neighbors comm_recv = [] - comm_recv += [self.grid.comm.Irecv(self.in_s, source=self.south, tag=4*self.nt + 0)] - comm_recv += [self.grid.comm.Irecv(self.in_n, source=self.north, tag=4*self.nt + 1)] + if self.north is not None: + comm_send += [self.grid.comm.Isend(self.out_n, dest=self.north, tag=4*self.nt + 0)] + comm_recv += [self.grid.comm.Irecv(self.in_n, source=self.north, tag=4*self.nt + 1)] + if self.south is not None: + comm_send += [self.grid.comm.Isend(self.out_s, dest=self.south, tag=4*self.nt + 1)] + comm_recv += [self.grid.comm.Irecv(self.in_s, source=self.south, tag=4*self.nt + 0)] #Wait for incoming transfers to complete for comm in comm_recv: comm.wait() #Upload to the GPU - for k in range(self.nvars): - self.sim.u0[k].upload(self.sim.stream, self.in_n[k,:,:], extent=self.write_n) - self.sim.u0[k].upload(self.sim.stream, self.in_s[k,:,:], extent=self.write_s) + if self.north is not None: + for k in range(self.nvars): + self.sim.u0[k].upload(self.sim.stream, self.in_n[k,:,:], extent=self.write_n) + if self.south is not None: + for k in range(self.nvars): + self.sim.u0[k].upload(self.sim.stream, self.in_s[k,:,:], extent=self.write_s) #Wait for sending to complete for comm in comm_send: @@ -293,29 +326,36 @@ class MPISimulator(Simulator.BaseSimulator): #### #Download from the GPU - for k in range(self.nvars): - self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_e[k,:,:], async=True, extent=self.read_e) - self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], async=True, extent=self.read_w) + if self.east is not None: + for k in range(self.nvars): + self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_e[k,:,:], async=True, extent=self.read_e) + if self.west is not None: + for k in range(self.nvars): + self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], async=True, extent=self.read_w) self.sim.stream.synchronize() - #Send to east/west neighbours + #Send/receive to east/west neighbours comm_send = [] - comm_send += [self.grid.comm.Isend(self.out_e, dest=self.east, tag=4*self.nt + 2)] - comm_send += [self.grid.comm.Isend(self.out_w, dest=self.west, tag=4*self.nt + 3)] - - #Receive from east/west neighbors comm_recv = [] - comm_recv += [self.grid.comm.Irecv(self.in_w, source=self.west, tag=4*self.nt + 2)] - comm_recv += [self.grid.comm.Irecv(self.in_e, source=self.east, tag=4*self.nt + 3)] + if self.east is not None: + comm_send += [self.grid.comm.Isend(self.out_e, dest=self.east, tag=4*self.nt + 2)] + comm_recv += [self.grid.comm.Irecv(self.in_e, source=self.east, tag=4*self.nt + 3)] + if self.west is not None: + comm_send += [self.grid.comm.Isend(self.out_w, dest=self.west, tag=4*self.nt + 3)] + comm_recv += [self.grid.comm.Irecv(self.in_w, source=self.west, tag=4*self.nt + 2)] + #Wait for incoming transfers to complete for comm in comm_recv: comm.wait() #Upload to the GPU - for k in range(self.nvars): - self.sim.u0[k].upload(self.sim.stream, self.in_e[k,:,:], extent=self.write_e) - self.sim.u0[k].upload(self.sim.stream, self.in_w[k,:,:], extent=self.write_w) + if self.east is not None: + for k in range(self.nvars): + self.sim.u0[k].upload(self.sim.stream, self.in_e[k,:,:], extent=self.write_e) + if self.west is not None: + for k in range(self.nvars): + self.sim.u0[k].upload(self.sim.stream, self.in_w[k,:,:], extent=self.write_w) #Wait for sending to complete for comm in comm_send: diff --git a/GPUSimulators/Simulator.py b/GPUSimulators/Simulator.py index a9f8c5a..535bec3 100644 --- a/GPUSimulators/Simulator.py +++ b/GPUSimulators/Simulator.py @@ -51,8 +51,6 @@ class BoundaryCondition(object): Periodic = 2, Reflective = 3 - - def __init__(self, types={ 'north': Type.Reflective, 'south': Type.Reflective, @@ -84,14 +82,23 @@ class BoundaryCondition(object): bc = 0 bc = bc | (self.north & 0x0000000F) << 24 bc = bc | (self.south & 0x0000000F) << 16 - bc = bc | (self.east & 0x0000000F) << 8 - bc = bc | (self.west & 0x0000000F) + bc = bc | (self.east & 0x0000000F) << 8 + bc = bc | (self.west & 0x0000000F) << 0 #for t in types: # print("{0:s}, {1:d}, {1:032b}, {1:08b}".format(t, types[t])) #print("bc: {0:032b}".format(bc)) return np.int32(bc) + + def getTypes(bc): + types = {} + types['north'] = BoundaryCondition.Type((bc >> 24) & 0x0000000F) + types['south'] = BoundaryCondition.Type((bc >> 16) & 0x0000000F) + types['east'] = BoundaryCondition.Type((bc >> 8) & 0x0000000F) + types['west'] = BoundaryCondition.Type((bc >> 0) & 0x0000000F) + return types + @@ -105,6 +112,7 @@ class BaseSimulator(object): context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, num_substeps, block_width, block_height): @@ -134,7 +142,7 @@ class BaseSimulator(object): self.ny = np.int32(ny) self.dx = np.float32(dx) self.dy = np.float32(dy) - self.dt = None + self.setBoundaryConditions(boundary_conditions) self.cfl_scale = cfl_scale self.num_substeps = num_substeps @@ -233,6 +241,13 @@ class BaseSimulator(object): def getExtent(self): return [0, 0, self.nx*self.dx, self.ny*self.dy] + def setBoundaryConditions(self, boundary_conditions): + self.logger.debug("Boundary conditions set to {:s}".format(str(boundary_conditions))) + self.boundary_conditions = boundary_conditions.asCodedInt() + + def getBoundaryConditions(self): + return BoundaryCondition(BoundaryCondition.getTypes(self.boundary_conditions)) + def substep(self, dt, step_number): """ Function which performs one single substep with stepsize dt diff --git a/GPUSimulators/WAF.py b/GPUSimulators/WAF.py index 89886ec..22860fc 100644 --- a/GPUSimulators/WAF.py +++ b/GPUSimulators/WAF.py @@ -61,11 +61,11 @@ class WAF (Simulator.BaseSimulator): super().__init__(context, nx, ny, dx, dy, + boundary_conditions, cfl_scale, 2, block_width, block_height); self.g = np.float32(g) - self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels module = context.get_module("cuda/SWE2D_WAF.cu", diff --git a/GPUSimulators/cuda/common.h b/GPUSimulators/cuda/common.h index 22678ea..d7c4825 100644 --- a/GPUSimulators/cuda/common.h +++ b/GPUSimulators/cuda/common.h @@ -114,19 +114,19 @@ enum BoundaryCondition { }; inline __device__ BoundaryCondition getBCNorth(int bc_) { - return static_cast(bc_ & 0x0000000F); + return static_cast((bc_ >> 24) & 0x0000000F); } inline __device__ BoundaryCondition getBCSouth(int bc_) { - return static_cast((bc_ >> 8) & 0x0000000F); -} - -inline __device__ BoundaryCondition getBCEast(int bc_) { return static_cast((bc_ >> 16) & 0x0000000F); } +inline __device__ BoundaryCondition getBCEast(int bc_) { + return static_cast((bc_ >> 8) & 0x0000000F); +} + inline __device__ BoundaryCondition getBCWest(int bc_) { - return static_cast(bc_ >> 24); + return static_cast((bc_ >> 0) & 0x0000000F); } @@ -404,9 +404,9 @@ __device__ void bcSouthReflective(float Q[h+2*gc_y][w+2*gc_x], } } } - - - + + + // North boundary template diff --git a/GPUSimulators/helpers/InitialConditions.py b/GPUSimulators/helpers/InitialConditions.py index 6764720..c34442b 100644 --- a/GPUSimulators/helpers/InitialConditions.py +++ b/GPUSimulators/helpers/InitialConditions.py @@ -234,7 +234,7 @@ def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None): - x0, x1, y0, y1, _, _ = getExtent(1.0, 1.0, nx, ny, grid) + x0, x1, y0, y1, _, dy = getExtent(1.0, 1.0, nx, ny, grid) x = np.linspace(x0, x1, nx) y = np.linspace(y0, y1, ny) _, y = np.meshgrid(x, y)