Removed fixed timestep size

2025-07-02 03:11:01 +02:00 · 2018-11-15 17:25:01 +01:00 · 2018-11-15 17:25:01 +01:00 · 815b4493b5
commit 815b4493b5
parent ddac53271c
9 changed files with 79 additions and 50 deletions
--- a/GPUSimulators/EE2D_KP07_dimsplit.py
+++ b/GPUSimulators/EE2D_KP07_dimsplit.py
@ -58,7 +58,7 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                 context, 
                 rho, rho_u, rho_v, E, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy,  
                 g, 
                 gamma, 
                 theta=1.3, 
@ -67,14 +67,14 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                 block_width=16, block_height=8):
                 
        # Call super constructor
-        super().__init__(context, \
-            nx, ny, \
-            dx, dy, 2*dt, \
+        super().__init__(context, 
+            nx, ny, 
+            dx, dy, 
+            cfl_scale, 
            block_width, block_height)
        self.g = np.float32(g)
        self.gamma = np.float32(gamma)
        self.theta = np.float32(theta) 
-        self.cfl_scale = cfl_scale
        self.boundary_conditions = boundary_conditions.asCodedInt()

        #Get kernels
@ -102,7 +102,10 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                        2, 2, 
                        [None, None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(gamma*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(gamma*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
                        
    
    def step(self, dt):
@ -140,4 +143,4 @@ class EE2D_KP07_dimsplit (BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5
--- a/GPUSimulators/FORCE.py
+++ b/GPUSimulators/FORCE.py
@ -57,7 +57,7 @@ class FORCE (Simulator.BaseSimulator):
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
@ -66,10 +66,10 @@ class FORCE (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height)
        self.g = np.float32(g) 
-        self.cfl_scale = cfl_scale
        self.boundary_conditions = boundary_conditions.asCodedInt()

        #Get kernels
@ -96,7 +96,10 @@ class FORCE (Simulator.BaseSimulator):
                        1, 1, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
        
    def step(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, 
@ -124,4 +127,4 @@ class FORCE (Simulator.BaseSimulator):
                
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5
--- a/GPUSimulators/HLL.py
+++ b/GPUSimulators/HLL.py
@ -52,7 +52,7 @@ class HLL (Simulator.BaseSimulator):
                 context,
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
@ -61,10 +61,10 @@ class HLL (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height);
        self.g = np.float32(g) 
-        self.cfl_scale = cfl_scale
        self.boundary_conditions = boundary_conditions.asCodedInt()

        #Get kernels
@ -91,7 +91,10 @@ class HLL (Simulator.BaseSimulator):
                        1, 1, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
        
    def step(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, 
@ -119,4 +122,4 @@ class HLL (Simulator.BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5
--- a/GPUSimulators/HLL2.py
+++ b/GPUSimulators/HLL2.py
@ -54,7 +54,7 @@ class HLL2 (Simulator.BaseSimulator):
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 theta=1.8, 
                 cfl_scale=0.9,
@ -64,7 +64,8 @@ class HLL2 (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt*2, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height);
        self.g = np.float32(g) 
        self.theta = np.float32(theta)
@ -95,7 +96,10 @@ class HLL2 (Simulator.BaseSimulator):
                        2, 2, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
        
    def step(self, dt):
        self.substepDimsplit(dt*0.5, 0)
@ -130,4 +134,4 @@ class HLL2 (Simulator.BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5
--- a/GPUSimulators/KP07.py
+++ b/GPUSimulators/KP07.py
@ -55,7 +55,7 @@ class KP07 (Simulator.BaseSimulator):
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 theta=1.3, 
                 cfl_scale=0.9,
@ -66,11 +66,11 @@ class KP07 (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height);
        self.g = np.float32(g)             
        self.theta = np.float32(theta) 
-        self.cfl_scale = cfl_scale
        self.order = np.int32(order)
        self.boundary_conditions = boundary_conditions.asCodedInt()

@ -98,7 +98,10 @@ class KP07 (Simulator.BaseSimulator):
                        2, 2, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
                        
        
    def step(self, dt):
@ -140,4 +143,4 @@ class KP07 (Simulator.BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5**self.order*self.cfl_scale
+        return max_dt*0.5**self.order
--- a/GPUSimulators/KP07_dimsplit.py
+++ b/GPUSimulators/KP07_dimsplit.py
@ -38,7 +38,7 @@ from pycuda import gpuarray
 """
 Class that solves the SW equations using the dimentionally split KP07 scheme
 """
-class KP07_dimsplit (Simulator.BaseSimulator):
+class KP07_dimsplit(Simulator.BaseSimulator):

    """
    Initialization routine
@ -56,7 +56,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 theta=1.3, 
                 cfl_scale=0.9,
@ -66,13 +66,13 @@ class KP07_dimsplit (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt*2, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height)
        self.gc_x = 2
        self.gc_y = 2
        self.g = np.float32(g)
        self.theta = np.float32(theta)
-        self.cfl_scale = cfl_scale
        self.boundary_conditions = boundary_conditions.asCodedInt()

        #Get kernels
@ -99,7 +99,10 @@ class KP07_dimsplit (Simulator.BaseSimulator):
                        self.gc_x, self.gc_y, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
    
    def step(self, dt):
        self.substepDimsplit(dt*0.5, 0)
@ -135,4 +138,4 @@ class KP07_dimsplit (Simulator.BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5
--- a/GPUSimulators/LxF.py
+++ b/GPUSimulators/LxF.py
@ -53,7 +53,7 @@ class LxF (Simulator.BaseSimulator):
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(),
@ -62,10 +62,10 @@ class LxF (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height);
        self.g = np.float32(g) 
-        self.cfl_scale = cfl_scale
        self.boundary_conditions = boundary_conditions.asCodedInt()

        # Get kernels
@ -92,7 +92,10 @@ class LxF (Simulator.BaseSimulator):
                        1, 1, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
        
    def step(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, 
@ -116,4 +119,4 @@ class LxF (Simulator.BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5
--- a/GPUSimulators/Simulator.py
+++ b/GPUSimulators/Simulator.py
@ -104,7 +104,8 @@ class BaseSimulator(object):
    def __init__(self, 
                 context, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
+                 cfl_scale,
                 block_width, block_height):
        """
        Initialization routine
@ -130,7 +131,7 @@ class BaseSimulator(object):
        self.ny = np.int32(ny)
        self.dx = np.float32(dx)
        self.dy = np.float32(dy)
-        self.dt = np.float32(dt)
+        self.cfl_scale = cfl_scale
        
        #Handle autotuning block size
        if (self.context.autotuner):
@ -168,20 +169,22 @@ class BaseSimulator(object):
        
        t_end = self.simTime() + t
        
+        dt = None
+        
        while(self.simTime() < t_end):
            if (self.simSteps() % 100 == 0):
-                self.dt = self.computeDt()
+                dt = self.computeDt()*self.cfl_scale
        
            # Compute timestep for "this" iteration (i.e., shorten last timestep)
-            local_dt = np.float32(min(self.dt, t_end-self.simTime()))
+            dt = np.float32(min(dt, t_end-self.simTime()))

            # Stop if end reached (should not happen)
-            if (local_dt <= 0.0):
+            if (dt <= 0.0):
                self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.simSteps()))
                break
        
            # Step forward in time
-            self.step(local_dt)
+            self.step(dt)

            #Print info
            print_string = printer.getPrintString(t_end - self.simTime())
--- a/GPUSimulators/WAF.py
+++ b/GPUSimulators/WAF.py
@ -51,7 +51,7 @@ class WAF (Simulator.BaseSimulator):
                 context,
                 h0, hu0, hv0, 
                 nx, ny, 
-                 dx, dy, dt, 
+                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
@ -60,10 +60,10 @@ class WAF (Simulator.BaseSimulator):
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
-            dx, dy, dt*2, 
+            dx, dy, 
+            cfl_scale,
            block_width, block_height);
        self.g = np.float32(g) 
-        self.cfl_scale = cfl_scale
        self.boundary_conditions = boundary_conditions.asCodedInt()

        #Get kernels
@ -78,7 +78,7 @@ class WAF (Simulator.BaseSimulator):
                                        }, 
                                        jit_compile_args={})
        self.kernel = module.get_function("WAFKernel")
-        self.kernel.prepare("iiffffiiPiPiPiPiPiPi")
+        self.kernel.prepare("iiffffiiPiPiPiPiPiPiP")
    
        #Create data by uploading to device
        self.u0 = Common.ArakawaA2D(self.stream, 
@ -90,7 +90,10 @@ class WAF (Simulator.BaseSimulator):
                        2, 2, 
                        [None, None, None])
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
-        self.cfl_data.fill(self.dt, stream=self.stream)
+        dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
+        dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
+        dt = min(dt_x, dt_y)
+        self.cfl_data.fill(dt, stream=self.stream)
    
    def step(self, dt):
        self.substepDimsplit(dt*0.5, substep=0)
@ -110,7 +113,8 @@ class WAF (Simulator.BaseSimulator):
                self.u0[2].data.gpudata, self.u0[2].data.strides[0], 
                self.u1[0].data.gpudata, self.u1[0].data.strides[0], 
                self.u1[1].data.gpudata, self.u1[1].data.strides[0], 
-                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
+                self.u1[2].data.gpudata, self.u1[2].data.strides[0],
+                self.cfl_data.gpudata)
        self.u0, self.u1 = self.u1, self.u0

    def download(self):
@ -122,4 +126,4 @@ class WAF (Simulator.BaseSimulator):
        
    def computeDt(self):
        max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
-        return max_dt*0.5*self.cfl_scale
+        return max_dt*0.5