Implemented proper boundary conditions handling

2026-01-14 15:48:43 +01:00 · 2018-11-08 22:07:31 +01:00
parent fd337e7d53
commit bdf7b4292c
5 changed files with 401229 additions and 456 deletions
--- a/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
@@ -156,17 +156,11 @@ __global__ void KP07DimsplitKernel(
    __shared__ float  F[4][h+4][w+4];
    
    //Read into shared memory
-    readBlock<w, h, gc>(  rho0_ptr_,   rho0_pitch_, Q[0], nx_, ny_);
-    readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
-    readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
-    readBlock<w, h, gc>(    E0_ptr_,     E0_pitch_, Q[3], nx_, ny_);
+    readBlock<w, h, gc,  1,  1>(  rho0_ptr_,   rho0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
+    readBlock<w, h, gc,  1, -1>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
+    readBlock<w, h, gc, -1,  1>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
+    readBlock<w, h, gc,  1,  1>(    E0_ptr_,     E0_pitch_, Q[3], nx_, ny_, boundary_conditions_);
    __syncthreads();
-    
-    //Fix boundary conditions
-    noFlowBoundary<w, h, gc,  1,  1>(Q[0], nx_, ny_);
-    noFlowBoundary<w, h, gc, -1,  1>(Q[1], nx_, ny_);
-    noFlowBoundary<w, h, gc,  1, -1>(Q[2], nx_, ny_);
-    noFlowBoundary<w, h, gc,  1,  1>(Q[3], nx_, ny_);


    //Step 0 => evolve x first, then y
@@ -185,7 +179,6 @@ __global__ void KP07DimsplitKernel(
        minmodSlopeY<w, h, gc, vars>(Q, Qx, theta_);
        __syncthreads();

-        
        computeFluxG(Q, Qx, F, gamma_, dy_, dt_);
        __syncthreads();

@@ -236,24 +229,21 @@ __global__ void KP07DimsplitKernel(
        
        //This is the RK2-part
        if (getOrder(step_order_) == 2) {
-            const int tx = threadIdx.x + gc;
-            const int ty = threadIdx.y + gc;
-            const float q1 = Q[0][ty][tx];
-            const float q2 = Q[1][ty][tx];
-            const float q3 = Q[2][ty][tx];
-            const float q4 = Q[3][ty][tx];
-            __syncthreads();
+            const int i = threadIdx.x + gc;
+            const int j = threadIdx.y + gc;
+            const int tx = blockDim.x*blockIdx.x + i;
+            const int ty = blockDim.y*blockIdx.y + j;
            
-            readBlock<w, h, gc>(  rho1_ptr_,   rho1_pitch_, Q[0], nx_, ny_);
-            readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
-            readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
-            readBlock<w, h, gc>(    E1_ptr_,     E1_pitch_, Q[3], nx_, ny_);
-            __syncthreads();
+            const float q1 = ((float*) ((char*)   rho1_ptr_ +   rho1_pitch_*ty))[tx];
+            const float q2 = ((float*) ((char*) rho_u1_ptr_ + rho_u1_pitch_*ty))[tx];
+            const float q3 = ((float*) ((char*) rho_v1_ptr_ + rho_v1_pitch_*ty))[tx];
+            const float q4 = ((float*) ((char*)     E1_ptr_ +     E1_pitch_*ty))[tx];
            
-            Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
-            Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
-            Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
-            Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
+            Q[0][j][i] = 0.5f*( Q[0][j][i] + q1 );
+            Q[1][j][i] = 0.5f*( Q[1][j][i] + q2 );
+            Q[2][j][i] = 0.5f*( Q[2][j][i] + q3 );
+            Q[3][j][i] = 0.5f*( Q[3][j][i] + q4 );
+            __syncthreads();
        }        
    }