Added no extern c as default

This commit is contained in:
André R. Brodtkorb 2018-08-13 16:10:25 +02:00
parent 8bda93e565
commit e48a408a7c
9 changed files with 38 additions and 49 deletions

View File

@ -182,7 +182,7 @@ class CudaContext(object):
"""
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
prepared_call_args, \
include_dirs=[], verbose=False, no_extern_c=False,
include_dirs=[], verbose=False, no_extern_c=True,
**kwargs):
"""
Helper function to print compilation output

View File

@ -98,6 +98,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
}
extern "C" {
__global__ void FORCEKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
@ -150,4 +151,6 @@ __global__ void FORCEKernel(
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_,
Q, nx_, ny_);
}
}
} // extern "C"

View File

@ -136,7 +136,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
extern "C" {
__global__ void HLL2Kernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
@ -228,4 +228,6 @@ __global__ void HLL2Kernel(
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_,
Q, nx_, ny_);
}
}
} // extern "C"

View File

@ -104,7 +104,8 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
extern "C" {
__global__ void HLLKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
@ -119,9 +120,13 @@ __global__ void HLLKernel(
float* h1_ptr_, int h1_pitch_,
float* hu1_ptr_, int hu1_pitch_,
float* hv1_ptr_, int hv1_pitch_) {
const int block_width = BLOCK_WIDTH;
const int block_height = BLOCK_HEIGHT;
//Shared memory variables
__shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
__shared__ float Q[3][block_height+2][block_width+2];
__shared__ float F[3][block_height+1][block_width+1];
//Read into shared memory
@ -160,4 +165,6 @@ __global__ void HLLKernel(
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_,
Q, nx_, ny_);
}
}
} // extern "C"

View File

@ -127,6 +127,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
/**
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
*/
extern "C" {
__global__ void KP07DimsplitKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
@ -219,4 +220,6 @@ __global__ void KP07DimsplitKernel(
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_,
Q, nx_, ny_);
}
}
} // extern "C"

View File

@ -106,8 +106,6 @@ __global__ void KP07Kernel(
float theta_,
float r_, //< Bottom friction coefficient
int step_,
//Input h^n
@ -180,15 +178,13 @@ __global__ void KP07Kernel(
float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj);
float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj);
float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj);
const float C = 2.0f*r_*dt_/Q[0][j][i];
if (step_ == 0) {
//First step of RK2 ODE integrator
h_row[ti] = h1;
hu_row[ti] = hu1 / (1.0f + C);
hv_row[ti] = hv1 / (1.0f + C);
hu_row[ti] = hu1;
hv_row[ti] = hv1;
}
else if (step_ == 1) {
//Second step of RK2 ODE integrator
@ -205,8 +201,8 @@ __global__ void KP07Kernel(
//Write to main memory
h_row[ti] = h_b;
hu_row[ti] = hu_b / (1.0f + 0.5f*C);
hv_row[ti] = hv_b / (1.0f + 0.5f*C);
hu_row[ti] = hu_b;
hv_row[ti] = hv_b;
}
}
}

View File

@ -66,7 +66,6 @@ class LxF (Simulator.BaseSimulator):
# Get kernels
self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
"iiffffPiPiPiPiPiPi", \
no_extern_c=True, \
BLOCK_WIDTH=block_width, \
BLOCK_HEIGHT=block_height)

View File

@ -97,9 +97,9 @@ void computeFluxG(float Q[3][block_height+2][block_width+2],
}
template <int block_width, int block_height>
__device__
void LxFKernelHelper(
extern "C" {
__global__
void LxFKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
float g_,
@ -114,6 +114,9 @@ void LxFKernelHelper(
float* hu1_ptr_, int hu1_pitch_,
float* hv1_ptr_, int hv1_pitch_) {
const int block_width = BLOCK_WIDTH;
const int block_height = BLOCK_HEIGHT;
//Index of cell within domain
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
const int tj = get_global_id(1) + 1;
@ -166,32 +169,5 @@ void LxFKernelHelper(
}
}
extern "C" {
__global__
void LxFKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
float g_,
//Input h^n
float* h0_ptr_, int h0_pitch_,
float* hu0_ptr_, int hu0_pitch_,
float* hv0_ptr_, int hv0_pitch_,
//Output h^{n+1}
float* h1_ptr_, int h1_pitch_,
float* hu1_ptr_, int hu1_pitch_,
float* hv1_ptr_, int hv1_pitch_) {
LxFKernelHelper<BLOCK_WIDTH, BLOCK_HEIGHT>(
nx_, ny_,
dx_, dy_, dt_,
g_,
h0_ptr_, h0_pitch_,
hu0_ptr_, hu0_pitch_,
hv0_ptr_, hv0_pitch_,
h1_ptr_, h1_pitch_,
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_);
}
} // extern "C"

View File

@ -115,6 +115,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
extern "C" {
__global__ void WAFKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
@ -193,4 +194,6 @@ __global__ void WAFKernel(
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_,
Q, nx_, ny_);
}
}
} // extern "C"