Implemented variable timestep

2025-11-29 17:28:03 +01:00 · 2018-11-15 16:48:47 +01:00
parent 7592ad5b9f
commit ddac53271c
15 changed files with 224 additions and 29 deletions
--- a/GPUSimulators/cuda/SWE2D_FORCE.cu
+++ b/GPUSimulators/cuda/SWE2D_FORCE.cu
@@ -97,7 +97,10 @@ __global__ void FORCEKernel(
        //Output h^{n+1}
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
+        float* hv1_ptr_, int hv1_pitch_,
+        
+        //Output CFL
+        float* cfl_) {
    
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
@@ -130,6 +133,11 @@ __global__ void FORCEKernel(
    writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
+    
+    //Compute the CFL for this block
+    if (cfl_ != NULL) {
+        writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
+    }
 }

 } // extern "C"
--- a/GPUSimulators/cuda/SWE2D_HLL.cu
+++ b/GPUSimulators/cuda/SWE2D_HLL.cu
@@ -113,7 +113,10 @@ __global__ void HLLKernel(
        //Output h^{n+1}
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
+        float* hv1_ptr_, int hv1_pitch_,
+        
+        //Output CFL
+        float* cfl_) {
    
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
@@ -148,6 +151,11 @@ __global__ void HLLKernel(
    writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
+    
+    //Compute the CFL for this block
+    if (cfl_ != NULL) {
+        writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
+    }
 }

 } // extern "C"
--- a/GPUSimulators/cuda/SWE2D_HLL2.cu
+++ b/GPUSimulators/cuda/SWE2D_HLL2.cu
@@ -141,7 +141,10 @@ __global__ void HLL2Kernel(
        //Output h^{n+1}
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
+        float* hv1_ptr_, int hv1_pitch_,
+        
+        //Output CFL
+        float* cfl_) {
    
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
@@ -203,6 +206,11 @@ __global__ void HLL2Kernel(
    writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
+    
+    //Compute the CFL for this block
+    if (cfl_ != NULL) {
+        writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
+    }
 }

 } // extern "C"
--- a/GPUSimulators/cuda/SWE2D_KP07.cu
+++ b/GPUSimulators/cuda/SWE2D_KP07.cu
@@ -151,8 +151,10 @@ __global__ void KP07Kernel(
        //Output h^{n+1}
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
-            
+        float* hv1_ptr_, int hv1_pitch_,
+        
+        //Output CFL
+        float* cfl_) {
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc_x = 2;
@@ -222,5 +224,10 @@ __global__ void KP07Kernel(
            hv_row[ti] = hv1;
        }
    }
+    
+    //Compute the CFL for this block
+    if (cfl_ != NULL) {
+        writeCfl<w, h, gc_x, gc_y, vars>(Q, Q[0], nx_, ny_, dx_, dy_, g_, cfl_);
+    }
 }
 } //extern "C"
--- a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
@@ -138,7 +138,10 @@ __global__ void KP07DimsplitKernel(
        //Output h^{n+1}
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
+        float* hv1_ptr_, int hv1_pitch_, 
+        
+        //Output CFL
+        float* cfl_) {
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc_x = 2;
@@ -194,6 +197,11 @@ __global__ void KP07DimsplitKernel(
    writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
+    
+    //Compute the CFL for this block
+    if (cfl_ != NULL) {
+        writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
+    }
 }


--- a/GPUSimulators/cuda/SWE2D_LxF.cu
+++ b/GPUSimulators/cuda/SWE2D_LxF.cu
@@ -114,7 +114,10 @@ void LxFKernel(
        //Output h^{n+1}
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
+        float* hv1_ptr_, int hv1_pitch_,
+        
+        //Output CFL
+        float* cfl_) {
    
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
@@ -154,6 +157,11 @@ void LxFKernel(
    writeBlock<w, h, gc_x, gc_y>( h1_ptr_,  h1_pitch_, Q[0], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
    writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
+    
+    //Compute the CFL for this block
+    if (cfl_ != NULL) {
+        writeCfl<w, h, gc_x, gc_y, vars>(Q, Q[0], nx_, ny_, dx_, dy_, g_, cfl_);
+    }
 }

 } // extern "C"
--- a/GPUSimulators/cuda/SWECommon.h
+++ b/GPUSimulators/cuda/SWECommon.h
@@ -468,3 +468,66 @@ __device__ float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float
    const float3 F_lw2 = LxW2_1D_flux(Q_l, Q_r, g_, dx_, dt_);
    return 0.5f*(F_lf + F_lw2);
 }
+
+
+
+
+
+
+
+
+
+
+template<int w, int h, int gc_x, int gc_y, int vars>
+__device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
+        float shmem[h+2*gc_y][w+2*gc_x],
+        const int nx_, const int ny_,
+        const float dx_, const float dy_, const float g_,
+        float* output_) {
+    //Index of thread within block
+    const int tx = threadIdx.x + gc_x;
+    const int ty = threadIdx.y + gc_y;
+    
+    //Index of cell within domain
+    const int ti = blockDim.x*blockIdx.x + tx;
+    const int tj = blockDim.y*blockIdx.y + ty;
+    
+    //Only internal cells
+    if (ti < nx_+gc_x && tj < ny_+gc_y) {
+        const float h = Q[0][ty][tx];
+        const float u   = Q[1][ty][tx] / h;
+        const float v   = Q[2][ty][tx] / h;
+        
+        const float max_u = dx_ / (fabsf(u) + sqrtf(g_*h));
+        const float max_v = dy_ / (fabsf(v) + sqrtf(g_*h));
+        
+        shmem[ty][tx] = fminf(max_u, max_v);
+    }
+    __syncthreads();
+    
+    //One row of threads loop over all rows
+    if (ti < nx_+gc_x && tj < ny_+gc_y) {
+        if (ty == gc_y) {
+            float min_val = shmem[ty][tx];
+            const int max_y = min(h, ny_+gc_y - tj);
+            for (int j=gc_y; j<max_y+gc_y; j++) {
+                min_val = fminf(min_val, shmem[j][tx]);
+            }
+            shmem[ty][tx] = min_val;
+        }
+    }
+    __syncthreads();
+    
+    //One thread loops over first row to find global max
+    if (tx == gc_x && ty == gc_y) {
+        float min_val = shmem[ty][tx];
+        const int max_x = min(w, nx_+gc_x - ti);
+        for (int i=gc_x; i<max_x+gc_x; ++i) {
+            min_val = fminf(min_val, shmem[ty][i]);
+        }
+        
+        const int idx = gridDim.x*blockIdx.y + blockIdx.x;
+        output_[idx] = min_val;
+    }
+}
+