Added no extern c as default

This commit is contained in:
André R. Brodtkorb 2018-08-13 16:10:25 +02:00
parent 8bda93e565
commit e48a408a7c
9 changed files with 38 additions and 49 deletions

View File

@ -182,7 +182,7 @@ class CudaContext(object):
""" """
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \ def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
prepared_call_args, \ prepared_call_args, \
include_dirs=[], verbose=False, no_extern_c=False, include_dirs=[], verbose=False, no_extern_c=True,
**kwargs): **kwargs):
""" """
Helper function to print compilation output Helper function to print compilation output

View File

@ -98,6 +98,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
} }
extern "C" {
__global__ void FORCEKernel( __global__ void FORCEKernel(
int nx_, int ny_, int nx_, int ny_,
float dx_, float dy_, float dt_, float dx_, float dy_, float dt_,
@ -150,4 +151,6 @@ __global__ void FORCEKernel(
hu1_ptr_, hu1_pitch_, hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_, hv1_ptr_, hv1_pitch_,
Q, nx_, ny_); Q, nx_, ny_);
} }
} // extern "C"

View File

@ -136,7 +136,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
extern "C" {
__global__ void HLL2Kernel( __global__ void HLL2Kernel(
int nx_, int ny_, int nx_, int ny_,
float dx_, float dy_, float dt_, float dx_, float dy_, float dt_,
@ -228,4 +228,6 @@ __global__ void HLL2Kernel(
hu1_ptr_, hu1_pitch_, hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_, hv1_ptr_, hv1_pitch_,
Q, nx_, ny_); Q, nx_, ny_);
} }
} // extern "C"

View File

@ -104,7 +104,8 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
extern "C" {
__global__ void HLLKernel( __global__ void HLLKernel(
int nx_, int ny_, int nx_, int ny_,
float dx_, float dy_, float dt_, float dx_, float dy_, float dt_,
@ -119,9 +120,13 @@ __global__ void HLLKernel(
float* h1_ptr_, int h1_pitch_, float* h1_ptr_, int h1_pitch_,
float* hu1_ptr_, int hu1_pitch_, float* hu1_ptr_, int hu1_pitch_,
float* hv1_ptr_, int hv1_pitch_) { float* hv1_ptr_, int hv1_pitch_) {
const int block_width = BLOCK_WIDTH;
const int block_height = BLOCK_HEIGHT;
//Shared memory variables //Shared memory variables
__shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2]; __shared__ float Q[3][block_height+2][block_width+2];
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1]; __shared__ float F[3][block_height+1][block_width+1];
//Read into shared memory //Read into shared memory
@ -160,4 +165,6 @@ __global__ void HLLKernel(
hu1_ptr_, hu1_pitch_, hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_, hv1_ptr_, hv1_pitch_,
Q, nx_, ny_); Q, nx_, ny_);
} }
} // extern "C"

View File

@ -127,6 +127,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
/** /**
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme * This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
*/ */
extern "C" {
__global__ void KP07DimsplitKernel( __global__ void KP07DimsplitKernel(
int nx_, int ny_, int nx_, int ny_,
float dx_, float dy_, float dt_, float dx_, float dy_, float dt_,
@ -219,4 +220,6 @@ __global__ void KP07DimsplitKernel(
hu1_ptr_, hu1_pitch_, hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_, hv1_ptr_, hv1_pitch_,
Q, nx_, ny_); Q, nx_, ny_);
} }
} // extern "C"

View File

@ -106,8 +106,6 @@ __global__ void KP07Kernel(
float theta_, float theta_,
float r_, //< Bottom friction coefficient
int step_, int step_,
//Input h^n //Input h^n
@ -180,15 +178,13 @@ __global__ void KP07Kernel(
float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj); float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj);
float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj); float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj);
float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj); float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj);
const float C = 2.0f*r_*dt_/Q[0][j][i];
if (step_ == 0) { if (step_ == 0) {
//First step of RK2 ODE integrator //First step of RK2 ODE integrator
h_row[ti] = h1; h_row[ti] = h1;
hu_row[ti] = hu1 / (1.0f + C); hu_row[ti] = hu1;
hv_row[ti] = hv1 / (1.0f + C); hv_row[ti] = hv1;
} }
else if (step_ == 1) { else if (step_ == 1) {
//Second step of RK2 ODE integrator //Second step of RK2 ODE integrator
@ -205,8 +201,8 @@ __global__ void KP07Kernel(
//Write to main memory //Write to main memory
h_row[ti] = h_b; h_row[ti] = h_b;
hu_row[ti] = hu_b / (1.0f + 0.5f*C); hu_row[ti] = hu_b;
hv_row[ti] = hv_b / (1.0f + 0.5f*C); hv_row[ti] = hv_b;
} }
} }
} }

View File

@ -66,7 +66,6 @@ class LxF (Simulator.BaseSimulator):
# Get kernels # Get kernels
self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \ self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
"iiffffPiPiPiPiPiPi", \ "iiffffPiPiPiPiPiPi", \
no_extern_c=True, \
BLOCK_WIDTH=block_width, \ BLOCK_WIDTH=block_width, \
BLOCK_HEIGHT=block_height) BLOCK_HEIGHT=block_height)

View File

@ -97,9 +97,9 @@ void computeFluxG(float Q[3][block_height+2][block_width+2],
} }
template <int block_width, int block_height> extern "C" {
__device__ __global__
void LxFKernelHelper( void LxFKernel(
int nx_, int ny_, int nx_, int ny_,
float dx_, float dy_, float dt_, float dx_, float dy_, float dt_,
float g_, float g_,
@ -114,6 +114,9 @@ void LxFKernelHelper(
float* hu1_ptr_, int hu1_pitch_, float* hu1_ptr_, int hu1_pitch_,
float* hv1_ptr_, int hv1_pitch_) { float* hv1_ptr_, int hv1_pitch_) {
const int block_width = BLOCK_WIDTH;
const int block_height = BLOCK_HEIGHT;
//Index of cell within domain //Index of cell within domain
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1 const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
const int tj = get_global_id(1) + 1; const int tj = get_global_id(1) + 1;
@ -166,32 +169,5 @@ void LxFKernelHelper(
} }
} }
extern "C" {
__global__
void LxFKernel(
int nx_, int ny_,
float dx_, float dy_, float dt_,
float g_,
//Input h^n
float* h0_ptr_, int h0_pitch_,
float* hu0_ptr_, int hu0_pitch_,
float* hv0_ptr_, int hv0_pitch_,
//Output h^{n+1}
float* h1_ptr_, int h1_pitch_,
float* hu1_ptr_, int hu1_pitch_,
float* hv1_ptr_, int hv1_pitch_) {
LxFKernelHelper<BLOCK_WIDTH, BLOCK_HEIGHT>(
nx_, ny_,
dx_, dy_, dt_,
g_,
h0_ptr_, h0_pitch_,
hu0_ptr_, hu0_pitch_,
hv0_ptr_, hv0_pitch_,
h1_ptr_, h1_pitch_,
hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_);
}
} // extern "C" } // extern "C"

View File

@ -115,6 +115,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
extern "C" {
__global__ void WAFKernel( __global__ void WAFKernel(
int nx_, int ny_, int nx_, int ny_,
float dx_, float dy_, float dt_, float dx_, float dy_, float dt_,
@ -193,4 +194,6 @@ __global__ void WAFKernel(
hu1_ptr_, hu1_pitch_, hu1_ptr_, hu1_pitch_,
hv1_ptr_, hv1_pitch_, hv1_ptr_, hv1_pitch_,
Q, nx_, ny_); Q, nx_, ny_);
} }
} // extern "C"