Added no extern c as default

2025-05-18 06:24:13 +02:00 · 2018-08-13 16:10:25 +02:00 · 2018-08-13 16:10:25 +02:00 · e48a408a7c
commit e48a408a7c
parent 8bda93e565
9 changed files with 38 additions and 49 deletions
--- a/SWESimulators/Common.py
+++ b/SWESimulators/Common.py
@ -182,7 +182,7 @@ class CudaContext(object):
    """
    def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
                    prepared_call_args, \
-                    include_dirs=[], verbose=False, no_extern_c=False, 
+                    include_dirs=[], verbose=False, no_extern_c=True, 
                    **kwargs):
        """
        Helper function to print compilation output
--- a/SWESimulators/FORCE_kernel.cu
+++ b/SWESimulators/FORCE_kernel.cu
@ -98,6 +98,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
 }


+extern "C" {
 __global__ void FORCEKernel(
        int nx_, int ny_,
        float dx_, float dy_, float dt_,
@ -150,4 +151,6 @@ __global__ void FORCEKernel(
                hu1_ptr_, hu1_pitch_,
                hv1_ptr_, hv1_pitch_,
                Q, nx_, ny_);
-}
+}
+
+} // extern "C"
--- a/SWESimulators/HLL2_kernel.cu
+++ b/SWESimulators/HLL2_kernel.cu
@ -136,7 +136,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],



-
+extern "C" {
 __global__ void HLL2Kernel(
        int nx_, int ny_,
        float dx_, float dy_, float dt_,
@ -228,4 +228,6 @@ __global__ void HLL2Kernel(
                hu1_ptr_, hu1_pitch_,
                hv1_ptr_, hv1_pitch_,
                Q, nx_, ny_);
-}
+}
+
+} // extern "C"
--- a/SWESimulators/HLL_kernel.cu
+++ b/SWESimulators/HLL_kernel.cu
@ -104,7 +104,8 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],



-
+extern "C" {
+    
 __global__ void HLLKernel(
        int nx_, int ny_,
        float dx_, float dy_, float dt_,
@ -119,9 +120,13 @@ __global__ void HLLKernel(
        float* h1_ptr_, int h1_pitch_,
        float* hu1_ptr_, int hu1_pitch_,
        float* hv1_ptr_, int hv1_pitch_) {
+            
+    const int block_width = BLOCK_WIDTH;
+    const int block_height = BLOCK_HEIGHT;
+    
    //Shared memory variables
-    __shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
-    __shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
+    __shared__ float Q[3][block_height+2][block_width+2];
+    __shared__ float F[3][block_height+1][block_width+1];
    
    
    //Read into shared memory
@ -160,4 +165,6 @@ __global__ void HLLKernel(
                hu1_ptr_, hu1_pitch_,
                hv1_ptr_, hv1_pitch_,
                Q, nx_, ny_);
-}
+}
+
+} // extern "C"
--- a/SWESimulators/KP07_dimsplit_kernel.cu
+++ b/SWESimulators/KP07_dimsplit_kernel.cu
@ -127,6 +127,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
 /**
  * This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
  */
+extern "C" {
 __global__ void KP07DimsplitKernel(
        int nx_, int ny_,
        float dx_, float dy_, float dt_,
@ -219,4 +220,6 @@ __global__ void KP07DimsplitKernel(
                hu1_ptr_, hu1_pitch_,
                hv1_ptr_, hv1_pitch_,
                Q, nx_, ny_);
-}
+}
+
+} // extern "C"
--- a/SWESimulators/KP07_kernel.cu
+++ b/SWESimulators/KP07_kernel.cu
@ -106,8 +106,6 @@ __global__ void KP07Kernel(
        
        float theta_,
        
-        float r_, //< Bottom friction coefficient
-        
        int step_,
        
        //Input h^n
@ -180,15 +178,13 @@ __global__ void KP07Kernel(
        float* const h_row  = (float*) ((char*) h1_ptr_ + h1_pitch_*tj);
        float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj);
        float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj);
-        
-        const float C = 2.0f*r_*dt_/Q[0][j][i];
                    
        if  (step_ == 0) {
            //First step of RK2 ODE integrator
            
            h_row[ti] = h1;
-            hu_row[ti] = hu1 / (1.0f + C);
-            hv_row[ti] = hv1 / (1.0f + C);
+            hu_row[ti] = hu1;
+            hv_row[ti] = hv1;
        }
        else if (step_ == 1) {
            //Second step of RK2 ODE integrator
@ -205,8 +201,8 @@ __global__ void KP07Kernel(
            
            //Write to main memory
            h_row[ti] = h_b;
-            hu_row[ti] = hu_b / (1.0f + 0.5f*C);
-            hv_row[ti] = hv_b / (1.0f + 0.5f*C);
+            hu_row[ti] = hu_b;
+            hv_row[ti] = hv_b;
        }
    }
 }
--- a/SWESimulators/LxF.py
+++ b/SWESimulators/LxF.py
@ -66,7 +66,6 @@ class LxF (Simulator.BaseSimulator):
        # Get kernels
        self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
                                        "iiffffPiPiPiPiPiPi", \
-                                        no_extern_c=True, \
                                        BLOCK_WIDTH=block_width, \
                                        BLOCK_HEIGHT=block_height)
        
--- a/SWESimulators/LxF_kernel.cu
+++ b/SWESimulators/LxF_kernel.cu
@ -97,9 +97,9 @@ void computeFluxG(float Q[3][block_height+2][block_width+2],
 }


-template <int block_width, int block_height>
-__device__ 
-void LxFKernelHelper(
+extern "C" {
+__global__ 
+void LxFKernel(
        int nx_, int ny_,
        float dx_, float dy_, float dt_,
        float g_,
@ -114,6 +114,9 @@ void LxFKernelHelper(
        float* hu1_ptr_, int hu1_pitch_,
        float* hv1_ptr_, int hv1_pitch_) {
            
+    const int block_width = BLOCK_WIDTH;
+    const int block_height = BLOCK_HEIGHT;
+            
    //Index of cell within domain
    const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
    const int tj = get_global_id(1) + 1;
@ -166,32 +169,5 @@ void LxFKernelHelper(
    }
 }

-extern "C" {
-__global__ 
-void LxFKernel(
-        int nx_, int ny_,
-        float dx_, float dy_, float dt_,
-        float g_,
-        
-        //Input h^n
-        float* h0_ptr_, int h0_pitch_,
-        float* hu0_ptr_, int hu0_pitch_,
-        float* hv0_ptr_, int hv0_pitch_,
-        
-        //Output h^{n+1}
-        float* h1_ptr_, int h1_pitch_,
-        float* hu1_ptr_, int hu1_pitch_,
-        float* hv1_ptr_, int hv1_pitch_) {
-    LxFKernelHelper<BLOCK_WIDTH, BLOCK_HEIGHT>(
-        nx_, ny_,
-        dx_, dy_, dt_,
-        g_,
-        h0_ptr_, h0_pitch_,
-        hu0_ptr_, hu0_pitch_,
-        hv0_ptr_, hv0_pitch_,
-        h1_ptr_, h1_pitch_,
-        hu1_ptr_, hu1_pitch_,
-        hv1_ptr_, hv1_pitch_);
-}
 } // extern "C"

--- a/SWESimulators/WAF_kernel.cu
+++ b/SWESimulators/WAF_kernel.cu
@ -115,6 +115,7 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],



+extern "C" {
 __global__ void WAFKernel(
        int nx_, int ny_,
        float dx_, float dy_, float dt_,
@ -193,4 +194,6 @@ __global__ void WAFKernel(
                hu1_ptr_, hu1_pitch_,
                hv1_ptr_, hv1_pitch_,
                Q, nx_, ny_);
-}
+}
+
+} // extern "C"