Implemented RT instability

2025-11-29 17:28:03 +01:00 · 2018-11-07 11:06:45 +01:00
parent 0f68c7867b
commit ae668a40d3
3 changed files with 170 additions and 46 deletions
--- a/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
@@ -59,6 +59,7 @@ void computeFluxF(float Q[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],

            // Compute flux based on prediction
            const float4 flux = CentralUpwindFlux(Q_l_bar, Q_r_bar, gamma_);
+            //const float4 flux = HLL_flux(Q_l_bar, Q_r_bar, gamma_);
            
            //Write to shared memory
            F[0][j][i] = flux.x;
@@ -103,6 +104,7 @@ void computeFluxG(float Q[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
            
            // Compute flux based on prediction
            const float4 flux = CentralUpwindFlux(Q_l_bar, Q_r_bar, gamma_);
+            //const float4 flux = HLL_flux(Q_l_bar, Q_r_bar, gamma_);
            
            //Write to shared memory
            //Note that we here swap hu and hv back to the original
@@ -162,10 +164,12 @@ __global__ void KP07DimsplitKernel(
    
    //Fix boundary conditions
    noFlowBoundary<w, h, gc,  1,  1>(Q[0], nx_, ny_);
-    noFlowBoundary<w, h, gc,  1,  1>(Q[1], nx_, ny_);
+    noFlowBoundary<w, h, gc, -1,  1>(Q[1], nx_, ny_);
    noFlowBoundary<w, h, gc,  1, -1>(Q[2], nx_, ny_);
    noFlowBoundary<w, h, gc,  1,  1>(Q[3], nx_, ny_);
    __syncthreads();
+    
+    const float g = 0.1f;


    //Step 0 => evolve x first, then y
@@ -182,7 +186,7 @@ __global__ void KP07DimsplitKernel(

        //Set boundary conditions
        noFlowBoundary<w, h, gc,  1,  1>(Q[0], nx_, ny_);
-        noFlowBoundary<w, h, gc,  1,  1>(Q[1], nx_, ny_);
+        noFlowBoundary<w, h, gc, -1,  1>(Q[1], nx_, ny_);
        noFlowBoundary<w, h, gc,  1, -1>(Q[2], nx_, ny_);
        noFlowBoundary<w, h, gc,  1,  1>(Q[3], nx_, ny_);
        __syncthreads();
@@ -196,6 +200,16 @@ __global__ void KP07DimsplitKernel(

        evolveG<w, h, gc, vars>(Q, F, dy_, dt_);
        __syncthreads();    
+        
+        //Gravity source term
+        {
+            const int i = threadIdx.x + gc;
+            const int j = threadIdx.y + gc;
+            const float rho_v = Q[2][j][i];
+            Q[2][j][i] -= g*Q[0][j][i]*dt_;
+            Q[3][j][i] -= g*rho_v*dt_;
+        }
+        __syncthreads();

    }
    //Step 1 => evolve y first, then x
@@ -212,7 +226,7 @@ __global__ void KP07DimsplitKernel(
  
        //Set boundary conditions
        noFlowBoundary<w, h, gc,  1,  1>(Q[0], nx_, ny_);
-        noFlowBoundary<w, h, gc,  1,  1>(Q[1], nx_, ny_);
+        noFlowBoundary<w, h, gc, -1,  1>(Q[1], nx_, ny_);
        noFlowBoundary<w, h, gc,  1, -1>(Q[2], nx_, ny_);
        noFlowBoundary<w, h, gc,  1,  1>(Q[3], nx_, ny_);
        __syncthreads();
@@ -227,6 +241,16 @@ __global__ void KP07DimsplitKernel(
        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        __syncthreads();
        
+        //Gravity source term
+        {
+            const int i = threadIdx.x + gc;
+            const int j = threadIdx.y + gc;
+            const float rho_v = Q[2][j][i];
+            Q[2][j][i] -= g*Q[0][j][i]*dt_;
+            Q[3][j][i] -= g*rho_v*dt_;
+        }
+        __syncthreads();
+        
        //This is the RK2-part
        const int tx = threadIdx.x + gc;
        const int ty = threadIdx.y + gc;
--- a/GPUSimulators/cuda/common.h
+++ b/GPUSimulators/cuda/common.h
@@ -102,19 +102,25 @@ inline __device__ void readBlock(float* ptr_, int pitch_,
    //Read into shared memory
    //Loop over all variables
    for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
-        //const int l = min(by + j, ny_+2*ghost_cells-1);
+        const int l = min(by + j, ny_+2*ghost_cells-1);
+        
+        /*
        const int y = by + j;
        const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad); 
-        const int l = y + y_offset;
+        const int l = min(y + y_offset, ny_+2*ghost_cells-1);
+        */
+        
        float* row = (float*) ((char*) ptr_  + pitch_*l);
        
        for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
-            //const int k = min(bx + i, nx_+2*ghost_cells-1);
+            const int k = min(bx + i, nx_+2*ghost_cells-1);
            
+            /*
            const int x = bx + i;
            const int gc_pad = 4;
            const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad); 
-            const int k = x + x_offset;
+            const int k = min(x + x_offset, nx_+2*ghost_cells-1);
+            */
            
            shmem[j][i] = row[k];
        }