diff --git a/GPUSimulators/cuda/SWE2D_FORCE.cu b/GPUSimulators/cuda/SWE2D_FORCE.cu index 0762029..1db7b17 100644 --- a/GPUSimulators/cuda/SWE2D_FORCE.cu +++ b/GPUSimulators/cuda/SWE2D_FORCE.cu @@ -117,6 +117,7 @@ __global__ void FORCEKernel( const unsigned int w = BLOCK_WIDTH; const unsigned int h = BLOCK_HEIGHT; const unsigned int gc = 1; + const unsigned int vars = 3; __shared__ float Q[3][h+2][w+2]; __shared__ float F[3][h+1][w+1]; @@ -136,10 +137,7 @@ __global__ void FORCEKernel( //Compute flux along x, and evolve computeFluxF(Q, F, g_, dx_, dt_); __syncthreads(); - - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); //Set boundary conditions @@ -151,10 +149,7 @@ __global__ void FORCEKernel( //Compute flux along y, and evolve computeFluxG(Q, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); //Write to main memory diff --git a/GPUSimulators/cuda/SWE2D_HLL.cu b/GPUSimulators/cuda/SWE2D_HLL.cu index 8b0c2cb..c05552c 100644 --- a/GPUSimulators/cuda/SWE2D_HLL.cu +++ b/GPUSimulators/cuda/SWE2D_HLL.cu @@ -124,6 +124,7 @@ __global__ void HLLKernel( const unsigned int w = BLOCK_WIDTH; const unsigned int h = BLOCK_HEIGHT; const unsigned int gc = 1; + const unsigned int vars = 3; //Shared memory variables __shared__ float Q[3][h+2][w+2]; @@ -145,9 +146,7 @@ __global__ void HLLKernel( computeFluxF(Q, F, g_); __syncthreads(); - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); //Set boundary conditions @@ -160,9 +159,7 @@ __global__ void HLLKernel( computeFluxG(Q, F, g_); __syncthreads(); - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); // Write to main memory for all internal cells diff --git a/GPUSimulators/cuda/SWE2D_HLL2.cu b/GPUSimulators/cuda/SWE2D_HLL2.cu index 40910cc..dc39089 100644 --- a/GPUSimulators/cuda/SWE2D_HLL2.cu +++ b/GPUSimulators/cuda/SWE2D_HLL2.cu @@ -159,6 +159,7 @@ __global__ void HLL2Kernel( const unsigned int w = BLOCK_WIDTH; const unsigned int h = BLOCK_HEIGHT; const unsigned int gc = 2; + const unsigned int vars = 3; //Shared memory variables __shared__ float Q[3][h+4][w+4]; @@ -184,10 +185,7 @@ __global__ void HLL2Kernel( __syncthreads(); computeFluxF(Q, Qx, F, g_, dx_, dt_); __syncthreads(); - - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); //Set boundary conditions @@ -201,10 +199,7 @@ __global__ void HLL2Kernel( __syncthreads(); computeFluxG(Q, Qx, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); } //Step 1 => evolve y first, then x @@ -214,10 +209,7 @@ __global__ void HLL2Kernel( __syncthreads(); computeFluxG(Q, Qx, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); //Set boundary conditions @@ -231,10 +223,7 @@ __global__ void HLL2Kernel( __syncthreads(); computeFluxF(Q, Qx, F, g_, dx_, dt_); __syncthreads(); - - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); } diff --git a/GPUSimulators/cuda/SWE2D_KP07.cu b/GPUSimulators/cuda/SWE2D_KP07.cu index 09b44a5..98d2c5b 100644 --- a/GPUSimulators/cuda/SWE2D_KP07.cu +++ b/GPUSimulators/cuda/SWE2D_KP07.cu @@ -122,6 +122,7 @@ __global__ void KP07Kernel( const unsigned int w = BLOCK_WIDTH; const unsigned int h = BLOCK_HEIGHT; const unsigned int gc = 2; + const unsigned int vars = 3; //Index of thread within block const int tx = threadIdx.x; diff --git a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu index 2957dcc..2ab0c70 100644 --- a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu +++ b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu @@ -150,6 +150,7 @@ __global__ void KP07DimsplitKernel( const unsigned int w = BLOCK_WIDTH; const unsigned int h = BLOCK_HEIGHT; const unsigned int gc = 2; + const unsigned int vars = 3; //Shared memory variables @@ -181,9 +182,7 @@ __global__ void KP07DimsplitKernel( __syncthreads(); computeFluxF(Q, Qx, F, g_, dx_, dt_); __syncthreads(); - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); //Set boundary conditions @@ -200,10 +199,7 @@ __global__ void KP07DimsplitKernel( computeFluxG(Q, Qx, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); } //Step 1 => evolve y first, then x @@ -213,10 +209,7 @@ __global__ void KP07DimsplitKernel( __syncthreads(); computeFluxG(Q, Qx, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); //Set boundary conditions @@ -230,9 +223,7 @@ __global__ void KP07DimsplitKernel( __syncthreads(); computeFluxF(Q, Qx, F, g_, dx_, dt_); __syncthreads(); - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); } diff --git a/GPUSimulators/cuda/SWE2D_WAF.cu b/GPUSimulators/cuda/SWE2D_WAF.cu index 1b80e4d..7a56d8e 100644 --- a/GPUSimulators/cuda/SWE2D_WAF.cu +++ b/GPUSimulators/cuda/SWE2D_WAF.cu @@ -134,6 +134,7 @@ __global__ void WAFKernel( const unsigned int w = BLOCK_WIDTH; const unsigned int h = BLOCK_HEIGHT; const unsigned int gc = 2; + const unsigned int vars = 3; //Shared memory variables __shared__ float Q[3][h+4][w+4]; @@ -161,10 +162,7 @@ __global__ void WAFKernel( //Compute fluxes along the x axis and evolve computeFluxF(Q, F, g_, dx_, dt_); __syncthreads(); - - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); //Fix boundary conditions @@ -176,10 +174,7 @@ __global__ void WAFKernel( //Compute fluxes along the y axis and evolve computeFluxG(Q, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); } //Step 1 => evolve y first, then x @@ -187,10 +182,7 @@ __global__ void WAFKernel( //Compute fluxes along the y axis and evolve computeFluxG(Q, F, g_, dy_, dt_); __syncthreads(); - - evolveG(Q[0], F[0], dy_, dt_); - evolveG(Q[1], F[1], dy_, dt_); - evolveG(Q[2], F[2], dy_, dt_); + evolveG(Q, F, dy_, dt_); __syncthreads(); //Fix boundary conditions @@ -202,10 +194,7 @@ __global__ void WAFKernel( //Compute fluxes along the x axis and evolve computeFluxF(Q, F, g_, dx_, dt_); __syncthreads(); - - evolveF(Q[0], F[0], dx_, dt_); - evolveF(Q[1], F[1], dx_, dt_); - evolveF(Q[2], F[2], dx_, dt_); + evolveF(Q, F, dx_, dt_); __syncthreads(); } diff --git a/GPUSimulators/cuda/common.h b/GPUSimulators/cuda/common.h index 9fe08ff..d0c53bc 100644 --- a/GPUSimulators/cuda/common.h +++ b/GPUSimulators/cuda/common.h @@ -245,9 +245,9 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2 -template -__device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], - float F[block_height+1][block_width+1], +template +__device__ void evolveF(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells], + float F[vars][block_height+1][block_width+1], const float dx_, const float dt_) { //Index of thread within block const int tx = threadIdx.x; @@ -260,8 +260,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_ //const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1 //const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells; //if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) { - Q[j][i] = Q[j][i] + (F[ty][tx] - F[ty][tx+1]) * dt_ / dx_; - + for (int var=0; var < vars; ++var) { + Q[var][j][i] = Q[var][j][i] + (F[var][ty][tx] - F[var][ty][tx+1]) * dt_ / dx_; + } } @@ -272,9 +273,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_ /** * Evolves the solution in time along the y axis (dimensional splitting) */ -template -__device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], - float G[block_height+1][block_width+1], +template +__device__ void evolveG(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells], + float G[vars][block_height+1][block_width+1], const float dy_, const float dt_) { //Index of thread within block const int tx = threadIdx.x; @@ -283,7 +284,9 @@ __device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_ const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1 const int j = ty + ghost_cells; - Q[j][i] = Q[j][i] + (G[ty][tx] - G[ty+1][tx]) * dt_ / dy_; + for (int var=0; var < vars; ++var) { + Q[var][j][i] = Q[var][j][i] + (G[var][ty][tx] - G[var][ty+1][tx]) * dt_ / dy_; + } }