diff --git a/GPUSimulators/EE2D_KP07_dimsplit.py b/GPUSimulators/EE2D_KP07_dimsplit.py index f1b5e1c..ccf826a 100644 --- a/GPUSimulators/EE2D_KP07_dimsplit.py +++ b/GPUSimulators/EE2D_KP07_dimsplit.py @@ -58,7 +58,7 @@ class EE2D_KP07_dimsplit (BaseSimulator): context, rho, rho_u, rho_v, E, nx, ny, - dx, dy, dt, + dx, dy, g, gamma, theta=1.3, @@ -67,14 +67,14 @@ class EE2D_KP07_dimsplit (BaseSimulator): block_width=16, block_height=8): # Call super constructor - super().__init__(context, \ - nx, ny, \ - dx, dy, 2*dt, \ + super().__init__(context, + nx, ny, + dx, dy, + cfl_scale, block_width, block_height) self.g = np.float32(g) self.gamma = np.float32(gamma) self.theta = np.float32(theta) - self.cfl_scale = cfl_scale self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels @@ -102,7 +102,10 @@ class EE2D_KP07_dimsplit (BaseSimulator): 2, 2, [None, None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(gamma*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(gamma*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): @@ -140,4 +143,4 @@ class EE2D_KP07_dimsplit (BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file diff --git a/GPUSimulators/FORCE.py b/GPUSimulators/FORCE.py index dc66b38..e6d7e94 100644 --- a/GPUSimulators/FORCE.py +++ b/GPUSimulators/FORCE.py @@ -57,7 +57,7 @@ class FORCE (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, cfl_scale=0.9, boundary_conditions=BoundaryCondition(), @@ -66,10 +66,10 @@ class FORCE (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt, + dx, dy, + cfl_scale, block_width, block_height) self.g = np.float32(g) - self.cfl_scale = cfl_scale self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels @@ -96,7 +96,10 @@ class FORCE (Simulator.BaseSimulator): 1, 1, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, @@ -124,4 +127,4 @@ class FORCE (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file diff --git a/GPUSimulators/HLL.py b/GPUSimulators/HLL.py index c76074a..26bfa11 100644 --- a/GPUSimulators/HLL.py +++ b/GPUSimulators/HLL.py @@ -52,7 +52,7 @@ class HLL (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, cfl_scale=0.9, boundary_conditions=BoundaryCondition(), @@ -61,10 +61,10 @@ class HLL (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt, + dx, dy, + cfl_scale, block_width, block_height); self.g = np.float32(g) - self.cfl_scale = cfl_scale self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels @@ -91,7 +91,10 @@ class HLL (Simulator.BaseSimulator): 1, 1, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, @@ -119,4 +122,4 @@ class HLL (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file diff --git a/GPUSimulators/HLL2.py b/GPUSimulators/HLL2.py index 3894116..02edaf7 100644 --- a/GPUSimulators/HLL2.py +++ b/GPUSimulators/HLL2.py @@ -54,7 +54,7 @@ class HLL2 (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, theta=1.8, cfl_scale=0.9, @@ -64,7 +64,8 @@ class HLL2 (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt*2, + dx, dy, + cfl_scale, block_width, block_height); self.g = np.float32(g) self.theta = np.float32(theta) @@ -95,7 +96,10 @@ class HLL2 (Simulator.BaseSimulator): 2, 2, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): self.substepDimsplit(dt*0.5, 0) @@ -130,4 +134,4 @@ class HLL2 (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file diff --git a/GPUSimulators/KP07.py b/GPUSimulators/KP07.py index ad4faef..a9d5912 100644 --- a/GPUSimulators/KP07.py +++ b/GPUSimulators/KP07.py @@ -55,7 +55,7 @@ class KP07 (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, theta=1.3, cfl_scale=0.9, @@ -66,11 +66,11 @@ class KP07 (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt, + dx, dy, + cfl_scale, block_width, block_height); self.g = np.float32(g) self.theta = np.float32(theta) - self.cfl_scale = cfl_scale self.order = np.int32(order) self.boundary_conditions = boundary_conditions.asCodedInt() @@ -98,7 +98,10 @@ class KP07 (Simulator.BaseSimulator): 2, 2, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): @@ -140,4 +143,4 @@ class KP07 (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5**self.order*self.cfl_scale \ No newline at end of file + return max_dt*0.5**self.order \ No newline at end of file diff --git a/GPUSimulators/KP07_dimsplit.py b/GPUSimulators/KP07_dimsplit.py index fd52f8a..000e6c7 100644 --- a/GPUSimulators/KP07_dimsplit.py +++ b/GPUSimulators/KP07_dimsplit.py @@ -38,7 +38,7 @@ from pycuda import gpuarray """ Class that solves the SW equations using the dimentionally split KP07 scheme """ -class KP07_dimsplit (Simulator.BaseSimulator): +class KP07_dimsplit(Simulator.BaseSimulator): """ Initialization routine @@ -56,7 +56,7 @@ class KP07_dimsplit (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, theta=1.3, cfl_scale=0.9, @@ -66,13 +66,13 @@ class KP07_dimsplit (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt*2, + dx, dy, + cfl_scale, block_width, block_height) self.gc_x = 2 self.gc_y = 2 self.g = np.float32(g) self.theta = np.float32(theta) - self.cfl_scale = cfl_scale self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels @@ -99,7 +99,10 @@ class KP07_dimsplit (Simulator.BaseSimulator): self.gc_x, self.gc_y, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): self.substepDimsplit(dt*0.5, 0) @@ -135,4 +138,4 @@ class KP07_dimsplit (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file diff --git a/GPUSimulators/LxF.py b/GPUSimulators/LxF.py index aa71281..62ea04b 100644 --- a/GPUSimulators/LxF.py +++ b/GPUSimulators/LxF.py @@ -53,7 +53,7 @@ class LxF (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, cfl_scale=0.9, boundary_conditions=BoundaryCondition(), @@ -62,10 +62,10 @@ class LxF (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt, + dx, dy, + cfl_scale, block_width, block_height); self.g = np.float32(g) - self.cfl_scale = cfl_scale self.boundary_conditions = boundary_conditions.asCodedInt() # Get kernels @@ -92,7 +92,10 @@ class LxF (Simulator.BaseSimulator): 1, 1, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, @@ -116,4 +119,4 @@ class LxF (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file diff --git a/GPUSimulators/Simulator.py b/GPUSimulators/Simulator.py index bfeedf6..6928d36 100644 --- a/GPUSimulators/Simulator.py +++ b/GPUSimulators/Simulator.py @@ -104,7 +104,8 @@ class BaseSimulator(object): def __init__(self, context, nx, ny, - dx, dy, dt, + dx, dy, + cfl_scale, block_width, block_height): """ Initialization routine @@ -130,7 +131,7 @@ class BaseSimulator(object): self.ny = np.int32(ny) self.dx = np.float32(dx) self.dy = np.float32(dy) - self.dt = np.float32(dt) + self.cfl_scale = cfl_scale #Handle autotuning block size if (self.context.autotuner): @@ -168,20 +169,22 @@ class BaseSimulator(object): t_end = self.simTime() + t + dt = None + while(self.simTime() < t_end): if (self.simSteps() % 100 == 0): - self.dt = self.computeDt() + dt = self.computeDt()*self.cfl_scale # Compute timestep for "this" iteration (i.e., shorten last timestep) - local_dt = np.float32(min(self.dt, t_end-self.simTime())) + dt = np.float32(min(dt, t_end-self.simTime())) # Stop if end reached (should not happen) - if (local_dt <= 0.0): + if (dt <= 0.0): self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.simSteps())) break # Step forward in time - self.step(local_dt) + self.step(dt) #Print info print_string = printer.getPrintString(t_end - self.simTime()) diff --git a/GPUSimulators/WAF.py b/GPUSimulators/WAF.py index 3de3b58..b7382e4 100644 --- a/GPUSimulators/WAF.py +++ b/GPUSimulators/WAF.py @@ -51,7 +51,7 @@ class WAF (Simulator.BaseSimulator): context, h0, hu0, hv0, nx, ny, - dx, dy, dt, + dx, dy, g, cfl_scale=0.9, boundary_conditions=BoundaryCondition(), @@ -60,10 +60,10 @@ class WAF (Simulator.BaseSimulator): # Call super constructor super().__init__(context, nx, ny, - dx, dy, dt*2, + dx, dy, + cfl_scale, block_width, block_height); self.g = np.float32(g) - self.cfl_scale = cfl_scale self.boundary_conditions = boundary_conditions.asCodedInt() #Get kernels @@ -78,7 +78,7 @@ class WAF (Simulator.BaseSimulator): }, jit_compile_args={}) self.kernel = module.get_function("WAFKernel") - self.kernel.prepare("iiffffiiPiPiPiPiPiPi") + self.kernel.prepare("iiffffiiPiPiPiPiPiPiP") #Create data by uploading to device self.u0 = Common.ArakawaA2D(self.stream, @@ -90,7 +90,10 @@ class WAF (Simulator.BaseSimulator): 2, 2, [None, None, None]) self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) - self.cfl_data.fill(self.dt, stream=self.stream) + dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) + dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) + dt = min(dt_x, dt_y) + self.cfl_data.fill(dt, stream=self.stream) def step(self, dt): self.substepDimsplit(dt*0.5, substep=0) @@ -110,7 +113,8 @@ class WAF (Simulator.BaseSimulator): self.u0[2].data.gpudata, self.u0[2].data.strides[0], self.u1[0].data.gpudata, self.u1[0].data.strides[0], self.u1[1].data.gpudata, self.u1[1].data.strides[0], - self.u1[2].data.gpudata, self.u1[2].data.strides[0]) + self.u1[2].data.gpudata, self.u1[2].data.strides[0], + self.cfl_data.gpudata) self.u0, self.u1 = self.u1, self.u0 def download(self): @@ -122,4 +126,4 @@ class WAF (Simulator.BaseSimulator): def computeDt(self): max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); - return max_dt*0.5*self.cfl_scale \ No newline at end of file + return max_dt*0.5 \ No newline at end of file