mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-11-29 17:28:03 +01:00
Implemented variable timestep
This commit is contained in:
@@ -97,7 +97,10 @@ __global__ void FORCEKernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -130,6 +133,11 @@ __global__ void FORCEKernel(
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -113,7 +113,10 @@ __global__ void HLLKernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -148,6 +151,11 @@ __global__ void HLLKernel(
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -141,7 +141,10 @@ __global__ void HLL2Kernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -203,6 +206,11 @@ __global__ void HLL2Kernel(
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -151,8 +151,10 @@ __global__ void KP07Kernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
const unsigned int gc_x = 2;
|
||||
@@ -222,5 +224,10 @@ __global__ void KP07Kernel(
|
||||
hv_row[ti] = hv1;
|
||||
}
|
||||
}
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
writeCfl<w, h, gc_x, gc_y, vars>(Q, Q[0], nx_, ny_, dx_, dy_, g_, cfl_);
|
||||
}
|
||||
}
|
||||
} //extern "C"
|
||||
@@ -138,7 +138,10 @@ __global__ void KP07DimsplitKernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
const unsigned int gc_x = 2;
|
||||
@@ -194,6 +197,11 @@ __global__ void KP07DimsplitKernel(
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
writeCfl<w, h, gc_x, gc_y, vars>(Q, F[0], nx_, ny_, dx_, dy_, g_, cfl_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -114,7 +114,10 @@ void LxFKernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -154,6 +157,11 @@ void LxFKernel(
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
writeCfl<w, h, gc_x, gc_y, vars>(Q, Q[0], nx_, ny_, dx_, dy_, g_, cfl_);
|
||||
}
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
|
||||
@@ -468,3 +468,66 @@ __device__ float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float
|
||||
const float3 F_lw2 = LxW2_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
||||
return 0.5f*(F_lf + F_lw2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<int w, int h, int gc_x, int gc_y, int vars>
|
||||
__device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
||||
float shmem[h+2*gc_y][w+2*gc_x],
|
||||
const int nx_, const int ny_,
|
||||
const float dx_, const float dy_, const float g_,
|
||||
float* output_) {
|
||||
//Index of thread within block
|
||||
const int tx = threadIdx.x + gc_x;
|
||||
const int ty = threadIdx.y + gc_y;
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = blockDim.x*blockIdx.x + tx;
|
||||
const int tj = blockDim.y*blockIdx.y + ty;
|
||||
|
||||
//Only internal cells
|
||||
if (ti < nx_+gc_x && tj < ny_+gc_y) {
|
||||
const float h = Q[0][ty][tx];
|
||||
const float u = Q[1][ty][tx] / h;
|
||||
const float v = Q[2][ty][tx] / h;
|
||||
|
||||
const float max_u = dx_ / (fabsf(u) + sqrtf(g_*h));
|
||||
const float max_v = dy_ / (fabsf(v) + sqrtf(g_*h));
|
||||
|
||||
shmem[ty][tx] = fminf(max_u, max_v);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
//One row of threads loop over all rows
|
||||
if (ti < nx_+gc_x && tj < ny_+gc_y) {
|
||||
if (ty == gc_y) {
|
||||
float min_val = shmem[ty][tx];
|
||||
const int max_y = min(h, ny_+gc_y - tj);
|
||||
for (int j=gc_y; j<max_y+gc_y; j++) {
|
||||
min_val = fminf(min_val, shmem[j][tx]);
|
||||
}
|
||||
shmem[ty][tx] = min_val;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
//One thread loops over first row to find global max
|
||||
if (tx == gc_x && ty == gc_y) {
|
||||
float min_val = shmem[ty][tx];
|
||||
const int max_x = min(w, nx_+gc_x - ti);
|
||||
for (int i=gc_x; i<max_x+gc_x; ++i) {
|
||||
min_val = fminf(min_val, shmem[ty][i]);
|
||||
}
|
||||
|
||||
const int idx = gridDim.x*blockIdx.y + blockIdx.x;
|
||||
output_[idx] = min_val;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user