/* This OpenCL kernel implements the Kurganov-Petrova numerical scheme for the shallow water equations, described in A. Kurganov & Guergana Petrova A Second-Order Well-Balanced Positivity Preserving Central-Upwind Scheme for the Saint-Venant System Communications in Mathematical Sciences, 5 (2007), 133-160. Copyright (C) 2016 SINTEF ICT This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "common.h" #include "SWECommon.h" #include "limiters.h" __device__ void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_) { //Index of thread within block const int tx = threadIdx.x; const int ty = threadIdx.y; { int j=ty; const int l = j + 2; //Skip ghost cells for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_); readBlock(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_); readBlock(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_); //Reconstruct slopes along x and axis minmodSlopeX(Q, Qx, theta_); minmodSlopeY(Q, Qy, theta_); __syncthreads(); //Compute fluxes along the x and y axis computeFluxF(Q, Qx, F, g_); computeFluxG(Q, Qy, G, g_); __syncthreads(); //Sum fluxes and advance in time for all internal cells if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) { const int i = tx + 2; //Skip local ghost cells, i.e., +2 const int j = ty + 2; const float h1 = Q[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_ + (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_; const float hu1 = Q[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_ + (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_; const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_ + (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_; float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj); float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj); float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj); if (getOrder(step_order_) == 2 && getStep(step_order_) == 1) { //Write to main memory h_row[ti] = 0.5f*(h_row[ti] + h1); hu_row[ti] = 0.5f*(hu_row[ti] + hu1); hv_row[ti] = 0.5f*(hv_row[ti] + hv1); } else { h_row[ti] = h1; hu_row[ti] = hu1; hv_row[ti] = hv1; } } } } //extern "C"