/* This OpenCL kernel implements the Kurganov-Petrova numerical scheme for the shallow water equations, described in A. Kurganov & Guergana Petrova A Second-Order Well-Balanced Positivity Preserving Central-Upwind Scheme for the Saint-Venant System Communications in Mathematical Sciences, 5 (2007), 133-160. Copyright (C) 2016 SINTEF ICT This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "common.opencl" float3 CDKLM16_F_func(const float3 Q, const float g) { float3 F; F.x = Q.x*Q.y; //h*u F.y = Q.x*Q.y*Q.y + 0.5f*g*Q.x*Q.x; //h*u*u + 0.5f*g*h*h; F.z = Q.x*Q.y*Q.z; //h*u*v; return F; } /** * Note that the input vectors are (h, u, v), thus not the regular * (h, hu, hv) */ float3 CDKLM16_flux(const float3 Qm, float3 Qp, const float g) { const float3 Fp = CDKLM16_F_func(Qp, g); const float up = Qp.y; // u const float cp = sqrt(g*Qp.x); // sqrt(g*h) const float3 Fm = CDKLM16_F_func(Qm, g); const float um = Qm.y; // u const float cm = sqrt(g*Qm.x); // sqrt(g*h) const float am = min(min(um-cm, up-cp), 0.0f); // largest negative wave speed const float ap = max(max(um+cm, up+cp), 0.0f); // largest positive wave speed float3 F; F.x = ((ap*Fm.x - am*Fp.x) + ap*am*(Qp.x-Qm.x))/(ap-am); F.y = ((ap*Fm.y - am*Fp.y) + ap*am*(Qp.y-Qm.y))/(ap-am); F.z = (Qm.y + Qp.y > 0) ? Fm.z : Fp.z; //Upwinding to be consistent return F; } __kernel void swe_2D( int nx_, int ny_, float dx_, float dy_, float dt_, float g_, float theta_, float f_, //< Coriolis coefficient float r_, //< Bottom friction coefficient int step_, //Input h^n __global float* h0_ptr_, int h0_pitch_, __global float* hu0_ptr_, int hu0_pitch_, __global float* hv0_ptr_, int hv0_pitch_, //Output h^{n+1} __global float* h1_ptr_, int h1_pitch_, __global float* hu1_ptr_, int hu1_pitch_, __global float* hv1_ptr_, int hv1_pitch_, //Wind stress parameters int wind_stress_type_, float tau0_, float rho_, float alpha_, float xm_, float Rc_, float x0_, float y0_, float u0_, float v0_, float t_) { //Index of thread within block const int tx = get_local_id(0); const int ty = get_local_id(1); //Index of block within domain const int bx = get_local_size(0) * get_group_id(0); const int by = get_local_size(1) * get_group_id(1); //Index of cell within domain const int ti = get_global_id(0) + 3; //Skip global ghost cells, i.e., +3 const int tj = get_global_id(1) + 3; // Our physical variables __local float R[3][block_height+6][block_width+6]; // Our reconstruction variables __local float Q[4][block_height+4][block_width+4]; __local float Qx[4][block_height][block_width+2]; __local float Qy[4][block_height+2][block_width]; // Our fluxes __local float F[3][block_height][block_width+1]; __local float G[3][block_height+1][block_width]; //Read into shared memory for (int j=ty; j 2 && ti < nx_+3 && tj > 2 && tj < ny_+3) { const int i = tx + 3; //Skip local ghost cells, i.e., +2 const int j = ty + 3; const float X = windStressX( wind_stress_type_, dx_, dy_, dt_, tau0_, rho_, alpha_, xm_, Rc_, x0_, y0_, u0_, v0_, t_); const float Y = windStressY( wind_stress_type_, dx_, dy_, dt_, tau0_, rho_, alpha_, xm_, Rc_, x0_, y0_, u0_, v0_, t_); const float h1 = R[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_ + (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_; const float hu1 = R[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_ + (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_ + dt_*X - dt_*f_*R[2][j][i]; const float hv1 = R[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_ + (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_ + dt_*Y + dt_*f_*R[1][j][i]; __global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj); __global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj); __global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj); const float C = 2.0f*r_*dt_/R[0][j][i]; if (step_ == 0) { //First step of RK2 ODE integrator h_row[ti] = h1; hu_row[ti] = hu1 / (1.0f + C); hv_row[ti] = hv1 / (1.0f + C); } else if (step_ == 1) { //Second step of RK2 ODE integrator //First read Q^n const float h_a = h_row[ti]; const float hu_a = hu_row[ti]; const float hv_a = hv_row[ti]; //Compute Q^n+1 const float h_b = 0.5f*(h_a + h1); const float hu_b = 0.5f*(hu_a + hu1); const float hv_b = 0.5f*(hv_a + hv1); //Write to main memory h_row[ti] = h_b; hu_row[ti] = hu_b / (1.0f + 0.5f*C); hv_row[ti] = hv_b / (1.0f + 0.5f*C); } } }