Refactoring

This commit is contained in:
André R. Brodtkorb 2018-10-31 15:49:10 +01:00
parent 2d8858e7e6
commit 064027fc0b
7 changed files with 34 additions and 69 deletions

View File

@ -117,6 +117,7 @@ __global__ void FORCEKernel(
const unsigned int w = BLOCK_WIDTH; const unsigned int w = BLOCK_WIDTH;
const unsigned int h = BLOCK_HEIGHT; const unsigned int h = BLOCK_HEIGHT;
const unsigned int gc = 1; const unsigned int gc = 1;
const unsigned int vars = 3;
__shared__ float Q[3][h+2][w+2]; __shared__ float Q[3][h+2][w+2];
__shared__ float F[3][h+1][w+1]; __shared__ float F[3][h+1][w+1];
@ -136,10 +137,7 @@ __global__ void FORCEKernel(
//Compute flux along x, and evolve //Compute flux along x, and evolve
computeFluxF(Q, F, g_, dx_, dt_); computeFluxF(Q, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
//Set boundary conditions //Set boundary conditions
@ -151,10 +149,7 @@ __global__ void FORCEKernel(
//Compute flux along y, and evolve //Compute flux along y, and evolve
computeFluxG(Q, F, g_, dy_, dt_); computeFluxG(Q, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
//Write to main memory //Write to main memory

View File

@ -124,6 +124,7 @@ __global__ void HLLKernel(
const unsigned int w = BLOCK_WIDTH; const unsigned int w = BLOCK_WIDTH;
const unsigned int h = BLOCK_HEIGHT; const unsigned int h = BLOCK_HEIGHT;
const unsigned int gc = 1; const unsigned int gc = 1;
const unsigned int vars = 3;
//Shared memory variables //Shared memory variables
__shared__ float Q[3][h+2][w+2]; __shared__ float Q[3][h+2][w+2];
@ -145,9 +146,7 @@ __global__ void HLLKernel(
computeFluxF(Q, F, g_); computeFluxF(Q, F, g_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_); evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
//Set boundary conditions //Set boundary conditions
@ -160,9 +159,7 @@ __global__ void HLLKernel(
computeFluxG(Q, F, g_); computeFluxG(Q, F, g_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_); evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
// Write to main memory for all internal cells // Write to main memory for all internal cells

View File

@ -159,6 +159,7 @@ __global__ void HLL2Kernel(
const unsigned int w = BLOCK_WIDTH; const unsigned int w = BLOCK_WIDTH;
const unsigned int h = BLOCK_HEIGHT; const unsigned int h = BLOCK_HEIGHT;
const unsigned int gc = 2; const unsigned int gc = 2;
const unsigned int vars = 3;
//Shared memory variables //Shared memory variables
__shared__ float Q[3][h+4][w+4]; __shared__ float Q[3][h+4][w+4];
@ -184,10 +185,7 @@ __global__ void HLL2Kernel(
__syncthreads(); __syncthreads();
computeFluxF(Q, Qx, F, g_, dx_, dt_); computeFluxF(Q, Qx, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
//Set boundary conditions //Set boundary conditions
@ -201,10 +199,7 @@ __global__ void HLL2Kernel(
__syncthreads(); __syncthreads();
computeFluxG(Q, Qx, F, g_, dy_, dt_); computeFluxG(Q, Qx, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
} }
//Step 1 => evolve y first, then x //Step 1 => evolve y first, then x
@ -214,10 +209,7 @@ __global__ void HLL2Kernel(
__syncthreads(); __syncthreads();
computeFluxG(Q, Qx, F, g_, dy_, dt_); computeFluxG(Q, Qx, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
//Set boundary conditions //Set boundary conditions
@ -231,10 +223,7 @@ __global__ void HLL2Kernel(
__syncthreads(); __syncthreads();
computeFluxF(Q, Qx, F, g_, dx_, dt_); computeFluxF(Q, Qx, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
} }

View File

@ -122,6 +122,7 @@ __global__ void KP07Kernel(
const unsigned int w = BLOCK_WIDTH; const unsigned int w = BLOCK_WIDTH;
const unsigned int h = BLOCK_HEIGHT; const unsigned int h = BLOCK_HEIGHT;
const unsigned int gc = 2; const unsigned int gc = 2;
const unsigned int vars = 3;
//Index of thread within block //Index of thread within block
const int tx = threadIdx.x; const int tx = threadIdx.x;

View File

@ -150,6 +150,7 @@ __global__ void KP07DimsplitKernel(
const unsigned int w = BLOCK_WIDTH; const unsigned int w = BLOCK_WIDTH;
const unsigned int h = BLOCK_HEIGHT; const unsigned int h = BLOCK_HEIGHT;
const unsigned int gc = 2; const unsigned int gc = 2;
const unsigned int vars = 3;
//Shared memory variables //Shared memory variables
@ -181,9 +182,7 @@ __global__ void KP07DimsplitKernel(
__syncthreads(); __syncthreads();
computeFluxF(Q, Qx, F, g_, dx_, dt_); computeFluxF(Q, Qx, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_); evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
//Set boundary conditions //Set boundary conditions
@ -200,10 +199,7 @@ __global__ void KP07DimsplitKernel(
computeFluxG(Q, Qx, F, g_, dy_, dt_); computeFluxG(Q, Qx, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
} }
//Step 1 => evolve y first, then x //Step 1 => evolve y first, then x
@ -213,10 +209,7 @@ __global__ void KP07DimsplitKernel(
__syncthreads(); __syncthreads();
computeFluxG(Q, Qx, F, g_, dy_, dt_); computeFluxG(Q, Qx, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
//Set boundary conditions //Set boundary conditions
@ -230,9 +223,7 @@ __global__ void KP07DimsplitKernel(
__syncthreads(); __syncthreads();
computeFluxF(Q, Qx, F, g_, dx_, dt_); computeFluxF(Q, Qx, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_); evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
} }

View File

@ -134,6 +134,7 @@ __global__ void WAFKernel(
const unsigned int w = BLOCK_WIDTH; const unsigned int w = BLOCK_WIDTH;
const unsigned int h = BLOCK_HEIGHT; const unsigned int h = BLOCK_HEIGHT;
const unsigned int gc = 2; const unsigned int gc = 2;
const unsigned int vars = 3;
//Shared memory variables //Shared memory variables
__shared__ float Q[3][h+4][w+4]; __shared__ float Q[3][h+4][w+4];
@ -161,10 +162,7 @@ __global__ void WAFKernel(
//Compute fluxes along the x axis and evolve //Compute fluxes along the x axis and evolve
computeFluxF(Q, F, g_, dx_, dt_); computeFluxF(Q, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
//Fix boundary conditions //Fix boundary conditions
@ -176,10 +174,7 @@ __global__ void WAFKernel(
//Compute fluxes along the y axis and evolve //Compute fluxes along the y axis and evolve
computeFluxG(Q, F, g_, dy_, dt_); computeFluxG(Q, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
} }
//Step 1 => evolve y first, then x //Step 1 => evolve y first, then x
@ -187,10 +182,7 @@ __global__ void WAFKernel(
//Compute fluxes along the y axis and evolve //Compute fluxes along the y axis and evolve
computeFluxG(Q, F, g_, dy_, dt_); computeFluxG(Q, F, g_, dy_, dt_);
__syncthreads(); __syncthreads();
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
__syncthreads(); __syncthreads();
//Fix boundary conditions //Fix boundary conditions
@ -202,10 +194,7 @@ __global__ void WAFKernel(
//Compute fluxes along the x axis and evolve //Compute fluxes along the x axis and evolve
computeFluxF(Q, F, g_, dx_, dt_); computeFluxF(Q, F, g_, dx_, dt_);
__syncthreads(); __syncthreads();
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
__syncthreads(); __syncthreads();
} }

View File

@ -245,9 +245,9 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
template<int block_width, int block_height, int ghost_cells> template<int block_width, int block_height, int ghost_cells, int vars>
__device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], __device__ void evolveF(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells],
float F[block_height+1][block_width+1], float F[vars][block_height+1][block_width+1],
const float dx_, const float dt_) { const float dx_, const float dt_) {
//Index of thread within block //Index of thread within block
const int tx = threadIdx.x; const int tx = threadIdx.x;
@ -260,8 +260,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_
//const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1 //const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1
//const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells; //const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells;
//if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) { //if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) {
Q[j][i] = Q[j][i] + (F[ty][tx] - F[ty][tx+1]) * dt_ / dx_; for (int var=0; var < vars; ++var) {
Q[var][j][i] = Q[var][j][i] + (F[var][ty][tx] - F[var][ty][tx+1]) * dt_ / dx_;
}
} }
@ -272,9 +273,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_
/** /**
* Evolves the solution in time along the y axis (dimensional splitting) * Evolves the solution in time along the y axis (dimensional splitting)
*/ */
template<int block_width, int block_height, int ghost_cells> template<int block_width, int block_height, int ghost_cells, int vars>
__device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], __device__ void evolveG(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells],
float G[block_height+1][block_width+1], float G[vars][block_height+1][block_width+1],
const float dy_, const float dt_) { const float dy_, const float dt_) {
//Index of thread within block //Index of thread within block
const int tx = threadIdx.x; const int tx = threadIdx.x;
@ -283,7 +284,9 @@ __device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_
const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1 const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1
const int j = ty + ghost_cells; const int j = ty + ghost_cells;
Q[j][i] = Q[j][i] + (G[ty][tx] - G[ty+1][tx]) * dt_ / dy_; for (int var=0; var < vars; ++var) {
Q[var][j][i] = Q[var][j][i] + (G[var][ty][tx] - G[var][ty+1][tx]) * dt_ / dy_;
}
} }