/* This kernel implements the Central Upwind flux function to solve the Euler equations Copyright (C) 2018 SINTEF Digital This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "common.h" #include "EulerCommon.h" #include "limiters.h" __device__ void computeFluxF(float Q[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], float Qx[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], float F[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], const float gamma_, const float dx_, const float dt_) { for (int j=threadIdx.y; j( rho0_ptr_, rho0_pitch_, Q[0], nx_, ny_); readBlock(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_); readBlock(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_); readBlock( E0_ptr_, E0_pitch_, Q[3], nx_, ny_); __syncthreads(); //Fix boundary conditions noFlowBoundary(Q[0], nx_, ny_); noFlowBoundary(Q[1], nx_, ny_); noFlowBoundary(Q[2], nx_, ny_); noFlowBoundary(Q[3], nx_, ny_); __syncthreads(); //Step 0 => evolve x first, then y if (step_ == 0) { //Compute fluxes along the x axis and evolve minmodSlopeX(Q, Qx, theta_); __syncthreads(); computeFluxF(Q, Qx, F, gamma_, dx_, dt_); __syncthreads(); evolveF(Q, F, dx_, dt_); __syncthreads(); //Set boundary conditions noFlowBoundary(Q[0], nx_, ny_); noFlowBoundary(Q[1], nx_, ny_); noFlowBoundary(Q[2], nx_, ny_); noFlowBoundary(Q[3], nx_, ny_); __syncthreads(); //Compute fluxes along the y axis and evolve minmodSlopeY(Q, Qx, theta_); __syncthreads(); computeFluxG(Q, Qx, F, gamma_, dy_, dt_); __syncthreads(); evolveG(Q, F, dy_, dt_); __syncthreads(); } //Step 1 => evolve y first, then x else { //Compute fluxes along the y axis and evolve minmodSlopeY(Q, Qx, theta_); __syncthreads(); computeFluxG(Q, Qx, F, gamma_, dy_, dt_); __syncthreads(); evolveG(Q, F, dy_, dt_); __syncthreads(); //Set boundary conditions noFlowBoundary(Q[0], nx_, ny_); noFlowBoundary(Q[1], nx_, ny_); noFlowBoundary(Q[2], nx_, ny_); noFlowBoundary(Q[3], nx_, ny_); __syncthreads(); //Compute fluxes along the x axis and evolve minmodSlopeX(Q, Qx, theta_); __syncthreads(); computeFluxF(Q, Qx, F, gamma_, dx_, dt_); __syncthreads(); evolveF(Q, F, dx_, dt_); __syncthreads(); //This is the RK2-part const int tx = threadIdx.x + gc; const int ty = threadIdx.y + gc; const float q1 = Q[0][ty][tx]; const float q2 = Q[1][ty][tx]; const float q3 = Q[2][ty][tx]; const float q4 = Q[3][ty][tx]; __syncthreads(); readBlock( rho1_ptr_, rho1_pitch_, Q[0], nx_, ny_); readBlock(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_); readBlock(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_); readBlock( E1_ptr_, E1_pitch_, Q[3], nx_, ny_); __syncthreads(); Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 ); Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 ); Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 ); Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 ); } // Write to main memory for all internal cells writeBlock( rho1_ptr_, rho1_pitch_, Q[0], nx_, ny_); writeBlock(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_); writeBlock(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_); writeBlock( E1_ptr_, E1_pitch_, Q[3], nx_, ny_); } } // extern "C"