Refactoring

This commit is contained in:
André R. Brodtkorb
2018-10-31 15:34:54 +01:00
parent 4fa09d5d39
commit 2d8858e7e6
19 changed files with 402 additions and 158 deletions

View File

@@ -44,6 +44,28 @@ inline __device__ float3 operator+(const float3 a, const float3 b) {
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
}
/**
* Float4 operators
*/
inline __device__ float4 operator*(const float a, const float4 b) {
return make_float4(a*b.x, a*b.y, a*b.z, a*b.w);
}
inline __device__ float4 operator/(const float4 a, const float b) {
return make_float4(a.x/b, a.y/b, a.z/b, a.w/b);
}
inline __device__ float4 operator-(const float4 a, const float4 b) {
return make_float4(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
}
inline __device__ float4 operator+(const float4 a, const float4 b) {
return make_float4(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w);
}
inline __device__ __host__ float clamp(const float f, const float a, const float b) {
return fmaxf(a, fminf(f, b));
}
@@ -223,62 +245,23 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
/**
* Evolves the solution in time along the x axis (dimensional splitting)
*/
__device__ void evolveF1(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
const int nx_, const int ny_,
template<int block_width, int block_height, int ghost_cells>
__device__ void evolveF(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells],
float F[block_height+1][block_width+1],
const float dx_, const float dt_) {
//Index of thread within block
const int tx = threadIdx.x;
const int ty = threadIdx.y;
const int i = tx + ghost_cells; //Skip local ghost cells
const int j = ty + ghost_cells;
//Index of cell within domain
const int ti = blockDim.x*blockIdx.x + threadIdx.x + 1; //Skip global ghost cells, i.e., +1
const int tj = blockDim.y*blockIdx.y + threadIdx.y + 1;
//const int ti = blockDim.x*blockIdx.x + threadIdx.x + ghost_cells; //Skip global ghost cells, i.e., +1
//const int tj = blockDim.y*blockIdx.y + threadIdx.y + ghost_cells;
//if (ti > ghost_cells-1 && ti < nx_+ghost_cells && tj > ghost_cells-1 && tj < ny_+ghost_cells) {
Q[j][i] = Q[j][i] + (F[ty][tx] - F[ty][tx+1]) * dt_ / dx_;
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
const int i = tx + 1; //Skip local ghost cells, i.e., +1
const int j = ty + 1;
Q[0][j][i] = Q[0][j][i] + (F[0][ty][tx] - F[0][ty][tx+1]) * dt_ / dx_;
Q[1][j][i] = Q[1][j][i] + (F[1][ty][tx] - F[1][ty][tx+1]) * dt_ / dx_;
Q[2][j][i] = Q[2][j][i] + (F[2][ty][tx] - F[2][ty][tx+1]) * dt_ / dx_;
}
}
/**
* Evolves the solution in time along the x axis (dimensional splitting)
*/
__device__ void evolveF2(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
const int nx_, const int ny_,
const float dx_, const float dt_) {
//Index of thread within block
const int tx = threadIdx.x;
const int ty = threadIdx.y;
//Index of cell within domain
const int ti = blockDim.x*blockIdx.x + threadIdx.x + 2; //Skip global ghost cells, i.e., +2
const int tj = blockDim.y*blockIdx.y + threadIdx.y + 2;
if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) {
const int i = tx + 2; //Skip local ghost cells, i.e., +1
const int j = ty + 2;
Q[0][j][i] = Q[0][j][i] + (F[0][ty][tx] - F[0][ty][tx+1]) * dt_ / dx_;
Q[1][j][i] = Q[1][j][i] + (F[1][ty][tx] - F[1][ty][tx+1]) * dt_ / dx_;
Q[2][j][i] = Q[2][j][i] + (F[2][ty][tx] - F[2][ty][tx+1]) * dt_ / dx_;
}
}
@@ -289,57 +272,18 @@ __device__ void evolveF2(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
/**
* Evolves the solution in time along the y axis (dimensional splitting)
*/
__device__ void evolveG1(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
const int nx_, const int ny_,
template<int block_width, int block_height, int ghost_cells>
__device__ void evolveG(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells],
float G[block_height+1][block_width+1],
const float dy_, const float dt_) {
//Index of thread within block
const int tx = threadIdx.x;
const int ty = threadIdx.y;
//Index of cell within domain
const int ti = blockDim.x*blockIdx.x + threadIdx.x + 1; //Skip global ghost cells, i.e., +1
const int tj = blockDim.y*blockIdx.y + threadIdx.y + 1;
const int i = tx + ghost_cells; //Skip local ghost cells, i.e., +1
const int j = ty + ghost_cells;
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
const int i = tx + 1; //Skip local ghost cells, i.e., +1
const int j = ty + 1;
Q[0][j][i] = Q[0][j][i] + (G[0][ty][tx] - G[0][ty+1][tx]) * dt_ / dy_;
Q[1][j][i] = Q[1][j][i] + (G[1][ty][tx] - G[1][ty+1][tx]) * dt_ / dy_;
Q[2][j][i] = Q[2][j][i] + (G[2][ty][tx] - G[2][ty+1][tx]) * dt_ / dy_;
}
}
/**
* Evolves the solution in time along the y axis (dimensional splitting)
*/
__device__ void evolveG2(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
const int nx_, const int ny_,
const float dy_, const float dt_) {
//Index of thread within block
const int tx = threadIdx.x;
const int ty = threadIdx.y;
//Index of cell within domain
const int ti = blockDim.x*blockIdx.x + threadIdx.x + 2; //Skip global ghost cells, i.e., +2
const int tj = blockDim.y*blockIdx.y + threadIdx.y + 2;
if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) {
const int i = tx + 2; //Skip local ghost cells, i.e., +2
const int j = ty + 2;
Q[0][j][i] = Q[0][j][i] + (G[0][ty][tx] - G[0][ty+1][tx]) * dt_ / dy_;
Q[1][j][i] = Q[1][j][i] + (G[1][ty][tx] - G[1][ty+1][tx]) * dt_ / dy_;
Q[2][j][i] = Q[2][j][i] + (G[2][ty][tx] - G[2][ty+1][tx]) * dt_ / dy_;
}
Q[j][i] = Q[j][i] + (G[ty][tx] - G[ty+1][tx]) * dt_ / dy_;
}