Implemented proper boundary conditions handling

2025-11-16 19:52:56 +01:00 · 2018-11-08 22:07:31 +01:00 · 2018-11-08 22:07:31 +01:00 · bdf7b4292c
commit bdf7b4292c
parent fd337e7d53
5 changed files with 401229 additions and 456 deletions
--- a/EulerTesting.ipynb
+++ b/EulerTesting.ipynb
--- a/GPUSimulators/EE2D_KP07_dimsplit.py
+++ b/GPUSimulators/EE2D_KP07_dimsplit.py
@ -61,7 +61,7 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                 gamma, \
                 theta=1.3, \
                 order=2, \
-                 boundaryConditions=BoundaryCondition(), \
+                 boundary_conditions=BoundaryCondition(), \
                 block_width=16, block_height=8):
                 
        # Call super constructor
@ -73,7 +73,7 @@ class EE2D_KP07_dimsplit (BaseSimulator):
        self.gamma = np.float32(gamma)
        self.theta = np.float32(theta) 
        self.order = np.int32(order)
-        self.boundaryConditions = boundaryConditions.asCodedInt()
+        self.boundary_conditions = boundary_conditions.asCodedInt()

        #Get kernels
        self.kernel = context.get_prepared_kernel("cuda/EE2D_KP07_dimsplit.cu", "KP07DimsplitKernel", \
@ -112,7 +112,7 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                self.gamma, \
                self.theta, \
                Simulator.stepOrderToCodedInt(step=0, order=self.order), \
-                self.boundaryConditions, \
+                self.boundary_conditions, \
                self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
                self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
                self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
@ -132,8 +132,8 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                self.g, \
                self.gamma, \
                self.theta, \
-                Simulator.stepOrderToCodedInt(step=0, order=self.order), \
-                self.boundaryConditions, \
+                Simulator.stepOrderToCodedInt(step=1, order=self.order), \
+                self.boundary_conditions, \
                self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
                self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
                self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
--- a/GPUSimulators/Simulator.py
+++ b/GPUSimulators/Simulator.py
@ -67,6 +67,12 @@ class BoundaryCondition(object):
        self.east = types['east']
        self.west = types['west']
        
+        if (self.north == BoundaryCondition.Type.Neumann \
+                or self.south == BoundaryCondition.Type.Neumann \
+                or self.east == BoundaryCondition.Type.Neumann \
+                or self.west == BoundaryCondition.Type.Neumann):
+            raise(NotImplementedError("Neumann boundary condition not supported"))
+
        
    def asCodedInt(self):
        """
@ -87,6 +93,9 @@ class BoundaryCondition(object):
    
    
    
+    
+    
+    
 class BaseSimulator(object):
   
    def __init__(self, \
--- a/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
@ -156,18 +156,12 @@ __global__ void KP07DimsplitKernel(
    __shared__ float  F[4][h+4][w+4];
    
    //Read into shared memory
-    readBlock<w, h, gc>(  rho0_ptr_,   rho0_pitch_, Q[0], nx_, ny_);
-    readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
-    readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
-    readBlock<w, h, gc>(    E0_ptr_,     E0_pitch_, Q[3], nx_, ny_);
+    readBlock<w, h, gc,  1,  1>(  rho0_ptr_,   rho0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
+    readBlock<w, h, gc,  1, -1>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
+    readBlock<w, h, gc, -1,  1>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
+    readBlock<w, h, gc,  1,  1>(    E0_ptr_,     E0_pitch_, Q[3], nx_, ny_, boundary_conditions_);
    __syncthreads();

-    //Fix boundary conditions
-    noFlowBoundary<w, h, gc,  1,  1>(Q[0], nx_, ny_);
-    noFlowBoundary<w, h, gc, -1,  1>(Q[1], nx_, ny_);
-    noFlowBoundary<w, h, gc,  1, -1>(Q[2], nx_, ny_);
-    noFlowBoundary<w, h, gc,  1,  1>(Q[3], nx_, ny_);
-

    //Step 0 => evolve x first, then y
    if (getStep(step_order_) == 0) {
@ -185,7 +179,6 @@ __global__ void KP07DimsplitKernel(
        minmodSlopeY<w, h, gc, vars>(Q, Qx, theta_);
        __syncthreads();

-        
        computeFluxG(Q, Qx, F, gamma_, dy_, dt_);
        __syncthreads();

@ -236,24 +229,21 @@ __global__ void KP07DimsplitKernel(
        
        //This is the RK2-part
        if (getOrder(step_order_) == 2) {
-            const int tx = threadIdx.x + gc;
-            const int ty = threadIdx.y + gc;
-            const float q1 = Q[0][ty][tx];
-            const float q2 = Q[1][ty][tx];
-            const float q3 = Q[2][ty][tx];
-            const float q4 = Q[3][ty][tx];
-            __syncthreads();
+            const int i = threadIdx.x + gc;
+            const int j = threadIdx.y + gc;
+            const int tx = blockDim.x*blockIdx.x + i;
+            const int ty = blockDim.y*blockIdx.y + j;
            
-            readBlock<w, h, gc>(  rho1_ptr_,   rho1_pitch_, Q[0], nx_, ny_);
-            readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
-            readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
-            readBlock<w, h, gc>(    E1_ptr_,     E1_pitch_, Q[3], nx_, ny_);
-            __syncthreads();
+            const float q1 = ((float*) ((char*)   rho1_ptr_ +   rho1_pitch_*ty))[tx];
+            const float q2 = ((float*) ((char*) rho_u1_ptr_ + rho_u1_pitch_*ty))[tx];
+            const float q3 = ((float*) ((char*) rho_v1_ptr_ + rho_v1_pitch_*ty))[tx];
+            const float q4 = ((float*) ((char*)     E1_ptr_ +     E1_pitch_*ty))[tx];
            
-            Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
-            Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
-            Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
-            Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
+            Q[0][j][i] = 0.5f*( Q[0][j][i] + q1 );
+            Q[1][j][i] = 0.5f*( Q[1][j][i] + q2 );
+            Q[2][j][i] = 0.5f*( Q[2][j][i] + q3 );
+            Q[3][j][i] = 0.5f*( Q[3][j][i] + q4 );
+            __syncthreads();
        }        
    }

--- a/GPUSimulators/cuda/common.h
+++ b/GPUSimulators/cuda/common.h
@ -86,45 +86,135 @@ __device__ float desingularize(float x_, float eps_) {



+
+
+
+/**
+  * Returns the step stored in the leftmost 16 bits 
+  * of the 32 bit step-order integer
+  */
+inline __device__ int getStep(int step_order_) {
+    return step_order_ >> 16;
+}
+
+/**
+  * Returns the order stored in the rightmost 16 bits 
+  * of the 32 bit step-order integer
+  */
+inline __device__ int getOrder(int step_order_) {
+    return step_order_ & 0x0000FFFF;
+}
+
+
+enum BoundaryCondition {
+    Dirichlet = 0,
+    Neumann = 1,
+    Periodic = 2,
+    Reflective = 3
+};
+
+inline __device__ BoundaryCondition getBCNorth(int bc_) {
+    return static_cast<BoundaryCondition>(bc_ & 0x000F);
+}
+
+inline __device__ BoundaryCondition getBCSouth(int bc_) {
+    return static_cast<BoundaryCondition>((bc_ >> 8) & 0x000F);
+}
+
+inline __device__ BoundaryCondition getBCEast(int bc_) {
+    return static_cast<BoundaryCondition>((bc_ >> 16) & 0x000F);
+}
+
+inline __device__ BoundaryCondition getBCWest(int bc_) {
+    return static_cast<BoundaryCondition>(bc_ >> 24);
+}
+
+
+
+
+
+
+template<int block_width, int block_height, int ghost_cells>
+inline __device__ int handlePeriodicBoundaryX(int k, int nx_, int boundary_conditions_) {
+    const int gc_pad = 2*ghost_cells;
+    
+    if ((k < gc_pad) 
+            && getBCWest(boundary_conditions_) == Periodic) {
+        k += (nx_+2*ghost_cells - 2*gc_pad);
+    }
+    else if ((k >= nx_+2*ghost_cells-gc_pad) 
+            && getBCEast(boundary_conditions_) == Periodic) {
+        k -= (nx_+2*ghost_cells - 2*gc_pad);
+    }
+    
+    return k;
+}
+
+template<int block_width, int block_height, int ghost_cells>
+inline __device__ int handlePeriodicBoundaryY(int l, int ny_, int boundary_conditions_) {
+    const int gc_pad = 2*ghost_cells;
+    
+    if ((l < gc_pad) 
+            && getBCSouth(boundary_conditions_) == Periodic) {
+        l += (ny_+2*ghost_cells - 2*gc_pad);
+    }
+    else if ((l >= ny_+2*ghost_cells-gc_pad) 
+            && getBCNorth(boundary_conditions_) == Periodic) {
+        l -= (ny_+2*ghost_cells - 2*gc_pad);
+    }
+    
+    return l;
+}
+    
+
 /**
  * Reads a block of data with ghost cells
  */
-template<int block_width, int block_height, int ghost_cells>
+template<int block_width, int block_height, int ghost_cells, int sign_north_south, int sign_east_west>
 inline __device__ void readBlock(float* ptr_, int pitch_,
-                float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells], 
-                const int nx_, const int ny_) {
+                float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], 
+                const int nx_, const int ny_,
+                const int boundary_conditions_) {
    //Index of block within domain
    const int bx = blockDim.x * blockIdx.x;
    const int by = blockDim.y * blockIdx.y;

-    const int gc_pad = 4;
-    
    //Read into shared memory
    //Loop over all variables
    for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
-        const int l = min(by + j, ny_+2*ghost_cells-1);
-        
-        /*
-        const int y = by + j;
-        const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad); 
-        const int l = min(y + y_offset, ny_+2*ghost_cells-1);
-        */
-        
+        //Handle periodic boundary conditions here
+        int l = handlePeriodicBoundaryY<block_width, block_height, ghost_cells>(by + j, ny_, boundary_conditions_);
+        l = min(l, ny_+2*ghost_cells-1);
        float* row = (float*) ((char*) ptr_ + pitch_*l);
        
        for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
-            const int k = min(bx + i, nx_+2*ghost_cells-1);
+            //Handle periodic boundary conditions here
+            int k = handlePeriodicBoundaryX<block_width, block_height, ghost_cells>(bx + i, nx_, boundary_conditions_);
+            k = min(k, nx_+2*ghost_cells-1);
            
-            /*
-            const int x = bx + i;
-            const int gc_pad = 4;
-            const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad); 
-            const int k = min(x + x_offset, nx_+2*ghost_cells-1);
-            */
-            
-            shmem[j][i] = row[k];
+            //Read from global memory
+            Q[j][i] = row[k];
        }
    }
+    __syncthreads();
+    
+    //Handle reflective boundary conditions
+    if (getBCNorth(boundary_conditions_) == Reflective) {
+        bcNorthReflective<block_width, block_height, ghost_cells, sign_north_south>(Q, nx_, ny_);
+        __syncthreads();
+    }
+    if (getBCSouth(boundary_conditions_) == Reflective) {
+        bcSouthReflective<block_width, block_height, ghost_cells, sign_north_south>(Q, nx_, ny_);
+        __syncthreads();
+    }
+    if (getBCEast(boundary_conditions_) == Reflective) {
+        bcEastReflective<block_width, block_height, ghost_cells, sign_east_west>(Q, nx_, ny_);
+        __syncthreads();
+    }
+    if (getBCWest(boundary_conditions_) == Reflective) {
+        bcWestReflective<block_width, block_height, ghost_cells, sign_east_west>(Q, nx_, ny_);
+        __syncthreads();
+    }
 }


@ -165,17 +255,6 @@ inline __device__ void writeBlock(float* ptr_, int pitch_,



-template<int block_width, int block_height, int ghost_cells, int scale_east_west=1, int scale_north_south=1>
-__device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], const int nx_, const int ny_) {
-    bcEastReflective<block_width, block_height, ghost_cells, scale_east_west>(Q, nx_, ny_);
-    bcWestReflective<block_width, block_height, ghost_cells, scale_east_west>(Q, nx_, ny_);
-    __syncthreads();
-    bcNorthReflective<block_width, block_height, ghost_cells, scale_north_south>(Q, nx_, ny_);
-    bcSouthReflective<block_width, block_height, ghost_cells, scale_north_south>(Q, nx_, ny_);
-    __syncthreads();
-}
-
-
 // West boundary
 template<int block_width, int block_height, int ghost_cells, int sign>
 __device__ void bcWestReflective(float Q[block_height+2*ghost_cells][block_width+2*ghost_cells], const int nx_, const int ny_) {
@ -355,50 +434,6 @@ __device__ void memset(float Q[vars][shmem_height][shmem_width], float value) {
 } 


-/**
-  * Returns the step stored in the leftmost 16 bits 
-  * of the 32 bit step-order integer
-  */
-inline __device__ int getStep(int step_order_) {
-    return step_order_ >> 16;
-}
-
-/**
-  * Returns the order stored in the rightmost 16 bits 
-  * of the 32 bit step-order integer
-  */
-inline __device__ int getOrder(int step_order_) {
-    return step_order_ & 0x0000FFFF;
-}
-
-
-enum BoundaryCondition {
-    Dirichlet = 0,
-    Neumann = 1,
-    Periodic = 2,
-    Reflective = 3
-};
-
-inline __device__ BoundaryCondition getBCNorth(int bc_) {
-    return static_cast<BoundaryCondition>(bc_ & 0x000F);
-}
-
-inline __device__ BoundaryCondition getBCSouth(int bc_) {
-    return static_cast<BoundaryCondition>((bc_ >> 8) & 0x000F);
-}
-
-inline __device__ BoundaryCondition getBCEast(int bc_) {
-    return static_cast<BoundaryCondition>((bc_ >> 16) & 0x000F);
-}
-
-inline __device__ BoundaryCondition getBCWest(int bc_) {
-    return static_cast<BoundaryCondition>(bc_ >> 24);
-}
-
-
-
-
-