mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-11-29 17:28:03 +01:00
feat(GPUSimulator): add the extra variables introduced in SWWECommon for all other algorithms
This commit is contained in:
@@ -100,8 +100,18 @@ __global__ void FORCEKernel(
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
|
||||
float* cfl_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
const unsigned int gc_x = 1;
|
||||
@@ -112,9 +122,9 @@ __global__ void FORCEKernel(
|
||||
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
||||
|
||||
//Read into shared memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
__syncthreads();
|
||||
|
||||
//Compute flux along x, and evolve
|
||||
@@ -130,9 +140,9 @@ __global__ void FORCEKernel(
|
||||
__syncthreads();
|
||||
|
||||
//Write to main memory
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
|
||||
@@ -116,7 +116,17 @@ __global__ void HLLKernel(
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
float* cfl_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -129,9 +139,9 @@ __global__ void HLLKernel(
|
||||
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
||||
|
||||
//Read into shared memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
|
||||
//Compute F flux
|
||||
computeFluxF(Q, F, g_);
|
||||
@@ -148,9 +158,9 @@ __global__ void HLLKernel(
|
||||
__syncthreads();
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
|
||||
@@ -144,7 +144,17 @@ __global__ void HLL2Kernel(
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
float* cfl_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -158,9 +168,9 @@ __global__ void HLL2Kernel(
|
||||
__shared__ float F[3][h+4][w+4];
|
||||
|
||||
//Read into shared memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
|
||||
//Step 0 => evolve x first, then y
|
||||
if (step_ == 0) {
|
||||
@@ -203,9 +213,9 @@ __global__ void HLL2Kernel(
|
||||
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
|
||||
@@ -154,7 +154,18 @@ __global__ void KP07Kernel(
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
float* cfl_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
const unsigned int gc_x = 2;
|
||||
@@ -179,9 +190,9 @@ __global__ void KP07Kernel(
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
|
||||
|
||||
//Reconstruct slopes along x and axis
|
||||
|
||||
@@ -141,7 +141,18 @@ __global__ void KP07DimsplitKernel(
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
float* cfl_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
const unsigned int gc_x = 2;
|
||||
@@ -154,9 +165,9 @@ __global__ void KP07DimsplitKernel(
|
||||
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
||||
|
||||
//Read into shared memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
|
||||
if (step_ == 0) {
|
||||
//Along X
|
||||
@@ -194,9 +205,9 @@ __global__ void KP07DimsplitKernel(
|
||||
}
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
|
||||
@@ -116,8 +116,18 @@ void LxFKernel(
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Output CFL
|
||||
float* cfl_) {
|
||||
//Output CFL
|
||||
float* cfl_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -130,9 +140,9 @@ void LxFKernel(
|
||||
__shared__ float G[vars][h+1][w ];
|
||||
|
||||
//Read from global memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
|
||||
//Compute fluxes along the x and y axis
|
||||
computeFluxF<w, h>(Q, F, g_, dx_, dt_);
|
||||
@@ -154,9 +164,9 @@ void LxFKernel(
|
||||
__syncthreads();
|
||||
|
||||
//Write to main memory
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
|
||||
//Compute the CFL for this block
|
||||
if (cfl_ != NULL) {
|
||||
|
||||
@@ -116,7 +116,17 @@ __global__ void WAFKernel(
|
||||
//Output h^{n+1}
|
||||
float* h1_ptr_, int h1_pitch_,
|
||||
float* hu1_ptr_, int hu1_pitch_,
|
||||
float* hv1_ptr_, int hv1_pitch_) {
|
||||
float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Subarea of internal domain to compute
|
||||
int x0=0, int y0=0,
|
||||
int x1=0, int y1=0) {
|
||||
|
||||
if(x1 == 0)
|
||||
x1 = nx_;
|
||||
|
||||
if(y1 == 0)
|
||||
y1 = ny_;
|
||||
|
||||
const unsigned int w = BLOCK_WIDTH;
|
||||
const unsigned int h = BLOCK_HEIGHT;
|
||||
@@ -131,9 +141,9 @@ __global__ void WAFKernel(
|
||||
|
||||
|
||||
//Read into shared memory Q from global memory
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||
__syncthreads();
|
||||
|
||||
|
||||
@@ -170,9 +180,9 @@ __global__ void WAFKernel(
|
||||
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||
}
|
||||
|
||||
} // extern "C"
|
||||
@@ -477,10 +477,11 @@ __device__ float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float
|
||||
|
||||
|
||||
|
||||
|
||||
template<int w, int h, int gc_x, int gc_y, int vars>
|
||||
__device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
||||
float shmem[h+2*gc_y][w+2*gc_x],
|
||||
// TODO give better names for `Q_w` and `Q_h` in the template
|
||||
// as it probably does not reflect well on the name
|
||||
template<int Q_w, int Q_h, int gc_x, int gc_y, int vars>
|
||||
__device__ void writeCfl(float Q[vars][Q_h+2*gc_y][Q_w+2*gc_x],
|
||||
float shmem[Q_h+2*gc_y][Q_w+2*gc_x],
|
||||
const int nx_, const int ny_,
|
||||
const float dx_, const float dy_, const float g_,
|
||||
float* output_) {
|
||||
@@ -509,7 +510,7 @@ __device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
||||
if (ti < nx_+gc_x && tj < ny_+gc_y) {
|
||||
if (ty == gc_y) {
|
||||
float min_val = shmem[ty][tx];
|
||||
const int max_y = min(h, ny_+gc_y - tj);
|
||||
const int max_y = min(Q_h, ny_+gc_y - tj);
|
||||
for (int j=gc_y; j<max_y+gc_y; j++) {
|
||||
min_val = fminf(min_val, shmem[j][tx]);
|
||||
}
|
||||
@@ -521,7 +522,7 @@ __device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
||||
//One thread loops over first row to find global max
|
||||
if (tx == gc_x && ty == gc_y) {
|
||||
float min_val = shmem[ty][tx];
|
||||
const int max_x = min(w, nx_+gc_x - ti);
|
||||
const int max_x = min(Q_w, nx_+gc_x - ti);
|
||||
for (int i=gc_x; i<max_x+gc_x; ++i) {
|
||||
min_val = fminf(min_val, shmem[ty][i]);
|
||||
}
|
||||
|
||||
@@ -322,8 +322,8 @@ inline __device__ void readBlock(float* ptr_, int pitch_,
|
||||
float Q[h+2*gc_y][w+2*gc_x],
|
||||
const int nx_, const int ny_,
|
||||
const int boundary_conditions_,
|
||||
int x0, int y0,
|
||||
int x1, int y1) {
|
||||
int x0, int y0,
|
||||
int x1, int y1) {
|
||||
//Index of block within domain
|
||||
const int bx = blockDim.x * blockIdx.x;
|
||||
const int by = blockDim.y * blockIdx.y;
|
||||
|
||||
Reference in New Issue
Block a user