diff --git a/SWESimulators/Common.py b/SWESimulators/Common.py index 0729b2c..b8ba23a 100644 --- a/SWESimulators/Common.py +++ b/SWESimulators/Common.py @@ -182,7 +182,7 @@ class CudaContext(object): """ def get_prepared_kernel(self, kernel_filename, kernel_function_name, \ prepared_call_args, \ - include_dirs=[], verbose=False, no_extern_c=False, + include_dirs=[], verbose=False, no_extern_c=True, **kwargs): """ Helper function to print compilation output diff --git a/SWESimulators/FORCE_kernel.cu b/SWESimulators/FORCE_kernel.cu index 7fac9ba..c43fd38 100644 --- a/SWESimulators/FORCE_kernel.cu +++ b/SWESimulators/FORCE_kernel.cu @@ -98,6 +98,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], } +extern "C" { __global__ void FORCEKernel( int nx_, int ny_, float dx_, float dy_, float dt_, @@ -150,4 +151,6 @@ __global__ void FORCEKernel( hu1_ptr_, hu1_pitch_, hv1_ptr_, hv1_pitch_, Q, nx_, ny_); -} \ No newline at end of file +} + +} // extern "C" \ No newline at end of file diff --git a/SWESimulators/HLL2_kernel.cu b/SWESimulators/HLL2_kernel.cu index 4b8ce58..168627f 100644 --- a/SWESimulators/HLL2_kernel.cu +++ b/SWESimulators/HLL2_kernel.cu @@ -136,7 +136,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], - +extern "C" { __global__ void HLL2Kernel( int nx_, int ny_, float dx_, float dy_, float dt_, @@ -228,4 +228,6 @@ __global__ void HLL2Kernel( hu1_ptr_, hu1_pitch_, hv1_ptr_, hv1_pitch_, Q, nx_, ny_); -} \ No newline at end of file +} + +} // extern "C" \ No newline at end of file diff --git a/SWESimulators/HLL_kernel.cu b/SWESimulators/HLL_kernel.cu index 0ae8feb..487782f 100644 --- a/SWESimulators/HLL_kernel.cu +++ b/SWESimulators/HLL_kernel.cu @@ -104,7 +104,8 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], - +extern "C" { + __global__ void HLLKernel( int nx_, int ny_, float dx_, float dy_, float dt_, @@ -119,9 +120,13 @@ __global__ void HLLKernel( float* h1_ptr_, int h1_pitch_, float* hu1_ptr_, int hu1_pitch_, float* hv1_ptr_, int hv1_pitch_) { + + const int block_width = BLOCK_WIDTH; + const int block_height = BLOCK_HEIGHT; + //Shared memory variables - __shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2]; - __shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1]; + __shared__ float Q[3][block_height+2][block_width+2]; + __shared__ float F[3][block_height+1][block_width+1]; //Read into shared memory @@ -160,4 +165,6 @@ __global__ void HLLKernel( hu1_ptr_, hu1_pitch_, hv1_ptr_, hv1_pitch_, Q, nx_, ny_); -} \ No newline at end of file +} + +} // extern "C" \ No newline at end of file diff --git a/SWESimulators/KP07_dimsplit_kernel.cu b/SWESimulators/KP07_dimsplit_kernel.cu index b37fa65..9a523aa 100644 --- a/SWESimulators/KP07_dimsplit_kernel.cu +++ b/SWESimulators/KP07_dimsplit_kernel.cu @@ -127,6 +127,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], /** * This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme */ +extern "C" { __global__ void KP07DimsplitKernel( int nx_, int ny_, float dx_, float dy_, float dt_, @@ -219,4 +220,6 @@ __global__ void KP07DimsplitKernel( hu1_ptr_, hu1_pitch_, hv1_ptr_, hv1_pitch_, Q, nx_, ny_); -} \ No newline at end of file +} + +} // extern "C" \ No newline at end of file diff --git a/SWESimulators/KP07_kernel.cu b/SWESimulators/KP07_kernel.cu index 7597f1a..11703b8 100644 --- a/SWESimulators/KP07_kernel.cu +++ b/SWESimulators/KP07_kernel.cu @@ -106,8 +106,6 @@ __global__ void KP07Kernel( float theta_, - float r_, //< Bottom friction coefficient - int step_, //Input h^n @@ -180,15 +178,13 @@ __global__ void KP07Kernel( float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj); float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj); float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj); - - const float C = 2.0f*r_*dt_/Q[0][j][i]; if (step_ == 0) { //First step of RK2 ODE integrator h_row[ti] = h1; - hu_row[ti] = hu1 / (1.0f + C); - hv_row[ti] = hv1 / (1.0f + C); + hu_row[ti] = hu1; + hv_row[ti] = hv1; } else if (step_ == 1) { //Second step of RK2 ODE integrator @@ -205,8 +201,8 @@ __global__ void KP07Kernel( //Write to main memory h_row[ti] = h_b; - hu_row[ti] = hu_b / (1.0f + 0.5f*C); - hv_row[ti] = hv_b / (1.0f + 0.5f*C); + hu_row[ti] = hu_b; + hv_row[ti] = hv_b; } } } \ No newline at end of file diff --git a/SWESimulators/LxF.py b/SWESimulators/LxF.py index 2a8b6ea..6cd6b54 100644 --- a/SWESimulators/LxF.py +++ b/SWESimulators/LxF.py @@ -66,7 +66,6 @@ class LxF (Simulator.BaseSimulator): # Get kernels self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \ "iiffffPiPiPiPiPiPi", \ - no_extern_c=True, \ BLOCK_WIDTH=block_width, \ BLOCK_HEIGHT=block_height) diff --git a/SWESimulators/LxF_kernel.cu b/SWESimulators/LxF_kernel.cu index 237e3b0..052ef17 100644 --- a/SWESimulators/LxF_kernel.cu +++ b/SWESimulators/LxF_kernel.cu @@ -97,9 +97,9 @@ void computeFluxG(float Q[3][block_height+2][block_width+2], } -template -__device__ -void LxFKernelHelper( +extern "C" { +__global__ +void LxFKernel( int nx_, int ny_, float dx_, float dy_, float dt_, float g_, @@ -114,6 +114,9 @@ void LxFKernelHelper( float* hu1_ptr_, int hu1_pitch_, float* hv1_ptr_, int hv1_pitch_) { + const int block_width = BLOCK_WIDTH; + const int block_height = BLOCK_HEIGHT; + //Index of cell within domain const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1 const int tj = get_global_id(1) + 1; @@ -166,32 +169,5 @@ void LxFKernelHelper( } } -extern "C" { -__global__ -void LxFKernel( - int nx_, int ny_, - float dx_, float dy_, float dt_, - float g_, - - //Input h^n - float* h0_ptr_, int h0_pitch_, - float* hu0_ptr_, int hu0_pitch_, - float* hv0_ptr_, int hv0_pitch_, - - //Output h^{n+1} - float* h1_ptr_, int h1_pitch_, - float* hu1_ptr_, int hu1_pitch_, - float* hv1_ptr_, int hv1_pitch_) { - LxFKernelHelper( - nx_, ny_, - dx_, dy_, dt_, - g_, - h0_ptr_, h0_pitch_, - hu0_ptr_, hu0_pitch_, - hv0_ptr_, hv0_pitch_, - h1_ptr_, h1_pitch_, - hu1_ptr_, hu1_pitch_, - hv1_ptr_, hv1_pitch_); -} } // extern "C" diff --git a/SWESimulators/WAF_kernel.cu b/SWESimulators/WAF_kernel.cu index 764d57c..f707aa8 100644 --- a/SWESimulators/WAF_kernel.cu +++ b/SWESimulators/WAF_kernel.cu @@ -115,6 +115,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], +extern "C" { __global__ void WAFKernel( int nx_, int ny_, float dx_, float dy_, float dt_, @@ -193,4 +194,6 @@ __global__ void WAFKernel( hu1_ptr_, hu1_pitch_, hv1_ptr_, hv1_pitch_, Q, nx_, ny_); -} \ No newline at end of file +} + +} // extern "C" \ No newline at end of file