mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
Refactoring
This commit is contained in:
parent
2d8858e7e6
commit
064027fc0b
@ -117,6 +117,7 @@ __global__ void FORCEKernel(
|
|||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc = 1;
|
const unsigned int gc = 1;
|
||||||
|
const unsigned int vars = 3;
|
||||||
|
|
||||||
__shared__ float Q[3][h+2][w+2];
|
__shared__ float Q[3][h+2][w+2];
|
||||||
__shared__ float F[3][h+1][w+1];
|
__shared__ float F[3][h+1][w+1];
|
||||||
@ -136,10 +137,7 @@ __global__ void FORCEKernel(
|
|||||||
//Compute flux along x, and evolve
|
//Compute flux along x, and evolve
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
@ -151,10 +149,7 @@ __global__ void FORCEKernel(
|
|||||||
//Compute flux along y, and evolve
|
//Compute flux along y, and evolve
|
||||||
computeFluxG(Q, F, g_, dy_, dt_);
|
computeFluxG(Q, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Write to main memory
|
//Write to main memory
|
||||||
|
@ -124,6 +124,7 @@ __global__ void HLLKernel(
|
|||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc = 1;
|
const unsigned int gc = 1;
|
||||||
|
const unsigned int vars = 3;
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][h+2][w+2];
|
__shared__ float Q[3][h+2][w+2];
|
||||||
@ -145,9 +146,7 @@ __global__ void HLLKernel(
|
|||||||
computeFluxF(Q, F, g_);
|
computeFluxF(Q, F, g_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
@ -160,9 +159,7 @@ __global__ void HLLKernel(
|
|||||||
computeFluxG(Q, F, g_);
|
computeFluxG(Q, F, g_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
// Write to main memory for all internal cells
|
// Write to main memory for all internal cells
|
||||||
|
@ -159,6 +159,7 @@ __global__ void HLL2Kernel(
|
|||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc = 2;
|
const unsigned int gc = 2;
|
||||||
|
const unsigned int vars = 3;
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][h+4][w+4];
|
__shared__ float Q[3][h+4][w+4];
|
||||||
@ -184,10 +185,7 @@ __global__ void HLL2Kernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
@ -201,10 +199,7 @@ __global__ void HLL2Kernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
//Step 1 => evolve y first, then x
|
//Step 1 => evolve y first, then x
|
||||||
@ -214,10 +209,7 @@ __global__ void HLL2Kernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
@ -231,10 +223,7 @@ __global__ void HLL2Kernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,6 +122,7 @@ __global__ void KP07Kernel(
|
|||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc = 2;
|
const unsigned int gc = 2;
|
||||||
|
const unsigned int vars = 3;
|
||||||
|
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = threadIdx.x;
|
const int tx = threadIdx.x;
|
||||||
|
@ -150,6 +150,7 @@ __global__ void KP07DimsplitKernel(
|
|||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc = 2;
|
const unsigned int gc = 2;
|
||||||
|
const unsigned int vars = 3;
|
||||||
|
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
@ -181,9 +182,7 @@ __global__ void KP07DimsplitKernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
@ -200,10 +199,7 @@ __global__ void KP07DimsplitKernel(
|
|||||||
|
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
//Step 1 => evolve y first, then x
|
//Step 1 => evolve y first, then x
|
||||||
@ -213,10 +209,7 @@ __global__ void KP07DimsplitKernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
@ -230,9 +223,7 @@ __global__ void KP07DimsplitKernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,6 +134,7 @@ __global__ void WAFKernel(
|
|||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc = 2;
|
const unsigned int gc = 2;
|
||||||
|
const unsigned int vars = 3;
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][h+4][w+4];
|
__shared__ float Q[3][h+4][w+4];
|
||||||
@ -161,10 +162,7 @@ __global__ void WAFKernel(
|
|||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
@ -176,10 +174,7 @@ __global__ void WAFKernel(
|
|||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
computeFluxG(Q, F, g_, dy_, dt_);
|
computeFluxG(Q, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
//Step 1 => evolve y first, then x
|
//Step 1 => evolve y first, then x
|
||||||
@ -187,10 +182,7 @@ __global__ void WAFKernel(
|
|||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
computeFluxG(Q, F, g_, dy_, dt_);
|
computeFluxG(Q, F, g_, dy_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
|
||||||
evolveG<w, h, gc>(Q[0], F[0], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[1], F[1], dy_, dt_);
|
|
||||||
evolveG<w, h, gc>(Q[2], F[2], dy_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
@ -202,10 +194,7 @@ __global__ void WAFKernel(
|
|||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
evolveF<w, h, gc>(Q[0], F[0], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[1], F[1], dx_, dt_);
|
|
||||||
evolveF<w, h, gc>(Q[2], F[2], dx_, dt_);
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -245,9 +245,9 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<int block_width, int block_height, int ghost_cells>
|
template<int block_width, int block_height, int ghost_cells, int vars>
|
||||||
__device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells],
|
__device__ void evolveF(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells],
|
||||||
float F[block_height+1][block_width+1],
|
float F[vars][block_height+1][block_width+1],
|
||||||
const float dx_, const float dt_) {
|
const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = threadIdx.x;
|
const int tx = threadIdx.x;
|
||||||
@ -260,8 +260,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_
|
|||||||
//const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1
|
//const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1
|
||||||
//const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells;
|
//const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells;
|
||||||
//if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) {
|
//if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) {
|
||||||
Q[j][i] = Q[j][i] + (F[ty][tx] - F[ty][tx+1]) * dt_ / dx_;
|
for (int var=0; var < vars; ++var) {
|
||||||
|
Q[var][j][i] = Q[var][j][i] + (F[var][ty][tx] - F[var][ty][tx+1]) * dt_ / dx_;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -272,9 +273,9 @@ __device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
template<int block_width, int block_height, int ghost_cells>
|
template<int block_width, int block_height, int ghost_cells, int vars>
|
||||||
__device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells],
|
__device__ void evolveG(float Q[vars][block_height+2*ghost_cells][block_width+2*ghost_cells],
|
||||||
float G[block_height+1][block_width+1],
|
float G[vars][block_height+1][block_width+1],
|
||||||
const float dy_, const float dt_) {
|
const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = threadIdx.x;
|
const int tx = threadIdx.x;
|
||||||
@ -283,7 +284,9 @@ __device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_
|
|||||||
const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1
|
const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1
|
||||||
const int j = ty + ghost_cells;
|
const int j = ty + ghost_cells;
|
||||||
|
|
||||||
Q[j][i] = Q[j][i] + (G[ty][tx] - G[ty+1][tx]) * dt_ / dy_;
|
for (int var=0; var < vars; ++var) {
|
||||||
|
Q[var][j][i] = Q[var][j][i] + (G[var][ty][tx] - G[var][ty+1][tx]) * dt_ / dy_;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user