Refactoring

2025-05-18 14:34:13 +02:00 · 2018-10-31 15:49:10 +01:00 · 2018-10-31 15:49:10 +01:00 · 064027fc0b
commit 064027fc0b
parent 2d8858e7e6
7 changed files with 34 additions and 69 deletions
--- a/GPUSimulators/cuda/SWE2D_FORCE.cu
+++ b/GPUSimulators/cuda/SWE2D_FORCE.cu
@ -117,6 +117,7 @@ __global__ void FORCEKernel(
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc = 1;
    const unsigned int vars = 3;
    __shared__ float Q[3][h+2][w+2];
    __shared__ float F[3][h+1][w+1];
@ -136,10 +137,7 @@ __global__ void FORCEKernel(
    //Compute flux along x, and evolve
    computeFluxF(Q, F, g_, dx_, dt_);
    __syncthreads();
-    
+    evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
    evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
    evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
    evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
    __syncthreads();
    //Set boundary conditions
@ -151,10 +149,7 @@ __global__ void FORCEKernel(
    //Compute flux along y, and evolve
    computeFluxG(Q, F, g_, dy_, dt_);
    __syncthreads();
-    
+    evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
    evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
    evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
    evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
    __syncthreads();
    //Write to main memory
--- a/GPUSimulators/cuda/SWE2D_HLL.cu
+++ b/GPUSimulators/cuda/SWE2D_HLL.cu
@ -124,6 +124,7 @@ __global__ void HLLKernel(
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc = 1;
    const unsigned int vars = 3;
    //Shared memory variables
    __shared__ float Q[3][h+2][w+2];
@ -145,9 +146,7 @@ __global__ void HLLKernel(
    computeFluxF(Q, F, g_);
    __syncthreads();
-    evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
+    evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
    evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
    evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
    __syncthreads();
    //Set boundary conditions
@ -160,9 +159,7 @@ __global__ void HLLKernel(
    computeFluxG(Q, F, g_);
    __syncthreads();
-    evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
+    evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
    evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
    evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
    __syncthreads();
    // Write to main memory for all internal cells
--- a/GPUSimulators/cuda/SWE2D_HLL2.cu
+++ b/GPUSimulators/cuda/SWE2D_HLL2.cu
@ -159,6 +159,7 @@ __global__ void HLL2Kernel(
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc = 2;
    const unsigned int vars = 3;
    //Shared memory variables
    __shared__ float  Q[3][h+4][w+4];
@ -184,10 +185,7 @@ __global__ void HLL2Kernel(
        __syncthreads();
        computeFluxF(Q, Qx, F, g_, dx_, dt_);
        __syncthreads();
-        
+        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
        evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
        evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
        __syncthreads();
        //Set boundary conditions
@ -201,10 +199,7 @@ __global__ void HLL2Kernel(
        __syncthreads();
        computeFluxG(Q, Qx, F, g_, dy_, dt_);
        __syncthreads();
-        
+        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
        evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
        evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
        __syncthreads();
    }
    //Step 1 => evolve y first, then x
@ -214,10 +209,7 @@ __global__ void HLL2Kernel(
        __syncthreads();
        computeFluxG(Q, Qx, F, g_, dy_, dt_);
        __syncthreads();
-        
+        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
        evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
        evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
        __syncthreads();
        //Set boundary conditions
@ -231,10 +223,7 @@ __global__ void HLL2Kernel(
        __syncthreads();
        computeFluxF(Q, Qx, F, g_, dx_, dt_);
        __syncthreads();
-        
+        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
        evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
        evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
        __syncthreads();
    }
--- a/GPUSimulators/cuda/SWE2D_KP07.cu
+++ b/GPUSimulators/cuda/SWE2D_KP07.cu
@ -122,6 +122,7 @@ __global__ void KP07Kernel(
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc = 2;
    const unsigned int vars = 3;
    //Index of thread within block
    const int tx = threadIdx.x;
--- a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
@ -150,6 +150,7 @@ __global__ void KP07DimsplitKernel(
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc = 2;
    const unsigned int vars = 3;
    //Shared memory variables
@ -181,9 +182,7 @@ __global__ void KP07DimsplitKernel(
        __syncthreads();
        computeFluxF(Q, Qx, F, g_, dx_, dt_);
        __syncthreads();
-        evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
+        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
        evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
        __syncthreads();
        //Set boundary conditions
@ -200,10 +199,7 @@ __global__ void KP07DimsplitKernel(
        computeFluxG(Q, Qx, F, g_, dy_, dt_);
        __syncthreads();
-        
+        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
        evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
        evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
        __syncthreads();
    }
    //Step 1 => evolve y first, then x
@ -213,10 +209,7 @@ __global__ void KP07DimsplitKernel(
        __syncthreads();
        computeFluxG(Q, Qx, F, g_, dy_, dt_);
        __syncthreads();
-        
+        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
        evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
        evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
        __syncthreads();
        //Set boundary conditions
@ -230,9 +223,7 @@ __global__ void KP07DimsplitKernel(
        __syncthreads();
        computeFluxF(Q, Qx, F, g_, dx_, dt_);
        __syncthreads();
-        evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
+        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
        evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
        __syncthreads();
    }
--- a/GPUSimulators/cuda/SWE2D_WAF.cu
+++ b/GPUSimulators/cuda/SWE2D_WAF.cu
@ -134,6 +134,7 @@ __global__ void WAFKernel(
    const unsigned int w = BLOCK_WIDTH;
    const unsigned int h = BLOCK_HEIGHT;
    const unsigned int gc = 2;
    const unsigned int vars = 3;
    //Shared memory variables
    __shared__ float Q[3][h+4][w+4];
@ -161,10 +162,7 @@ __global__ void WAFKernel(
        //Compute fluxes along the x axis and evolve
        computeFluxF(Q, F, g_, dx_, dt_);
        __syncthreads();
-        
+        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
        evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
        evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
        __syncthreads();
        //Fix boundary conditions
@ -176,10 +174,7 @@ __global__ void WAFKernel(
        //Compute fluxes along the y axis and evolve
        computeFluxG(Q, F, g_, dy_, dt_);
        __syncthreads();
-        
+        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
        evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
        evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
        __syncthreads();
    }
    //Step 1 => evolve y first, then x
@ -187,10 +182,7 @@ __global__ void WAFKernel(
        //Compute fluxes along the y axis and evolve
        computeFluxG(Q, F, g_, dy_, dt_);
        __syncthreads();
-        
+        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
        evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
        evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
        __syncthreads();
        //Fix boundary conditions
@ -202,10 +194,7 @@ __global__ void WAFKernel(
        //Compute fluxes along the x axis and evolve
        computeFluxF(Q, F, g_, dx_, dt_);
        __syncthreads();
-        
+        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
        evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
        evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
        __syncthreads();
    }
--- a/GPUSimulators/cuda/common.h
+++ b/GPUSimulators/cuda/common.h
@ -245,9 +245,9 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
-template<int block_width, int block_height, int ghost_cells>
+template<int block_width, int block_height, int ghost_cells, int vars>
-__device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells],
+__device__ void evolveF(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells],
-              float F[block_height+1][block_width+1],
+              float F[vars][block_height+1][block_width+1],
              const float dx_, const float dt_) {
    //Index of thread within block
    const int tx = threadIdx.x;
@ -260,8 +260,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_
    //const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1
    //const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells;
    //if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) {
-    Q[j][i] = Q[j][i] + (F[ty][tx] - F[ty][tx+1]) * dt_ / dx_;
+    for (int var=0; var < vars; ++var) {
-    
+        Q[var][j][i] = Q[var][j][i] + (F[var][ty][tx] - F[var][ty][tx+1]) * dt_ / dx_;
    }
 }
@ -272,9 +273,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_
 /**
  * Evolves the solution in time along the y axis (dimensional splitting)
  */
-template<int block_width, int block_height, int ghost_cells>
+template<int block_width, int block_height, int ghost_cells, int vars>
-__device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells],
+__device__ void evolveG(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells],
-              float G[block_height+1][block_width+1],
+              float G[vars][block_height+1][block_width+1],
              const float dy_, const float dt_) {
    //Index of thread within block
    const int tx = threadIdx.x;
@ -283,7 +284,9 @@ __device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_
    const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1
    const int j = ty + ghost_cells;
-    Q[j][i] = Q[j][i] + (G[ty][tx] - G[ty+1][tx]) * dt_ / dy_;
+    for (int var=0; var < vars; ++var) {
        Q[var][j][i] = Q[var][j][i] + (G[var][ty][tx] - G[var][ty+1][tx]) * dt_ / dy_;
    }
 }