Euler appears to work now

This commit is contained in:
André R. Brodtkorb
2018-11-05 16:46:37 +01:00
parent 0671bd747a
commit e38885d39b
13 changed files with 702 additions and 19 deletions

View File

@@ -154,10 +154,10 @@ __global__ void KP07DimsplitKernel(
//Read into shared memory
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_+4, ny_+4);
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_+4, ny_+4);
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_+4, ny_+4);
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_+4, ny_+4);
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_, ny_);
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_, ny_);
__syncthreads();
//Fix boundary conditions
@@ -226,6 +226,26 @@ __global__ void KP07DimsplitKernel(
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
__syncthreads();
//This is the RK2-part
const int tx = threadIdx.x + gc;
const int ty = threadIdx.y + gc;
const float q1 = Q[0][ty][tx];
const float q2 = Q[1][ty][tx];
const float q3 = Q[2][ty][tx];
const float q4 = Q[3][ty][tx];
__syncthreads();
readBlock<w, h, gc>( rho1_ptr_, rho1_pitch_, Q[0], nx_, ny_);
readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
readBlock<w, h, gc>( E1_ptr_, E1_pitch_, Q[3], nx_, ny_);
__syncthreads();
Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
}

View File

@@ -92,19 +92,29 @@ __device__ float desingularize(float x_, float eps_) {
template<int block_width, int block_height, int ghost_cells>
inline __device__ void readBlock(float* ptr_, int pitch_,
float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells],
const int max_x_, const int max_y_) {
const int nx_, const int ny_) {
//Index of block within domain
const int bx = blockDim.x * blockIdx.x;
const int by = blockDim.y * blockIdx.y;
const int gc_pad = 4;
//Read into shared memory
//Loop over all variables
for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
const int l = min(by + j, max_y_-1);
//const int l = min(by + j, ny_+2*ghost_cells-1);
const int y = by + j;
const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad);
const int l = y + y_offset;
float* row = (float*) ((char*) ptr_ + pitch_*l);
for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
const int k = min(bx + i, max_x_-1);
//const int k = min(bx + i, nx_+2*ghost_cells-1);
const int x = bx + i;
const int gc_pad = 4;
const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad);
const int k = x + x_offset;
shmem[j][i] = row[k];
}
@@ -248,6 +258,16 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
template<int block_width, int block_height, int ghost_cells, int vars>