mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
222 lines
8.0 KiB
Plaintext
222 lines
8.0 KiB
Plaintext
/**
|
|
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
described in
|
|
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
predictions", Met no report 2012/3 and 2012/5.
|
|
|
|
Copyright (C) 2016 SINTEF ICT
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
|
|
|
|
#include "common.opencl"
|
|
|
|
|
|
|
|
/**
|
|
* Kernel that evolves V one step in time.
|
|
*/
|
|
__kernel void computeVKernel(
|
|
//Discretization parameters
|
|
int nx_, int ny_,
|
|
float dx_, float dy_, float dt_,
|
|
|
|
//Physical parameters
|
|
float g_, //< Gravitational constant
|
|
float f_, //< Coriolis coefficient
|
|
float r_, //< Bottom friction coefficient
|
|
|
|
//Numerical diffusion
|
|
float A_,
|
|
|
|
//Data
|
|
__global float* H_ptr_, int H_pitch_,
|
|
__global float* eta1_ptr_, int eta1_pitch_, // eta^n
|
|
__global float* U1_ptr_, int U1_pitch_, // U^n
|
|
__global float* V0_ptr_, int V0_pitch_, // V^n-1, also output V^n+1
|
|
__global float* V1_ptr_, int V1_pitch_, // V^n
|
|
|
|
// Wind stress parameters
|
|
int wind_stress_type_,
|
|
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
float x0_, float y0_,
|
|
float u0_, float v0_,
|
|
float t_) {
|
|
|
|
__local float H_shared[block_height+1][block_width+2];
|
|
__local float eta1_shared[block_height+1][block_width+2];
|
|
__local float U1_shared[block_height+1][block_width+1];
|
|
__local float V1_shared[block_height+2][block_width+2];
|
|
|
|
//Index of thread within block
|
|
const int tx = get_local_id(0);
|
|
const int ty = get_local_id(1);
|
|
|
|
//Start of block within domain
|
|
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
|
|
//Index of cell within domain
|
|
const int ti = bx + tx;
|
|
const int tj = by + ty;
|
|
|
|
//Compute pointer to current row in the V array
|
|
__global float* const V0_row = (__global float*) ((__global char*) V0_ptr_ + V0_pitch_*tj);
|
|
|
|
//Read current V
|
|
float V0 = 0.0f;
|
|
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
|
V0 = V0_row[ti];
|
|
}
|
|
|
|
//Read H and eta into shared memory: (nx+2)*(ny+1) cells
|
|
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
// "fake" global ghost cells by clamping
|
|
const int l = clamp(by + j, 1, ny_);
|
|
|
|
//Compute the pointer to current row in the H and eta arrays
|
|
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
|
__global float* const eta1_row = (__global float*) ((__global char*) eta1_ptr_ + eta1_pitch_*l);
|
|
|
|
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
// "fake" global ghost cells by clamping
|
|
const int k = clamp(bx + i - 1, 1, nx_);
|
|
|
|
H_shared[j][i] = H_row[k];
|
|
eta1_shared[j][i] = eta1_row[k];
|
|
}
|
|
}
|
|
|
|
//Read U into shared memory: (nx+1)*(ny+1) cells
|
|
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
// "fake" ghost cells by clamping
|
|
const int l = clamp(by + j, 1, ny_);
|
|
|
|
//Compute the pointer to current row in the U array
|
|
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
|
|
|
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
// Prevent out-of-bounds
|
|
const int k = clamp(bx + i - 1, 0, nx_);
|
|
|
|
U1_shared[j][i] = U1_row[k];
|
|
}
|
|
}
|
|
|
|
|
|
//Read V into shared memory: (nx+2)*(ny+2) cells
|
|
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
// Prevent out-of-bounds
|
|
const int l = clamp(by + j - 1, 0, ny_);
|
|
|
|
//Compute the pointer to current row in the U array
|
|
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
|
|
|
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
// "fake" ghost cells by clamping
|
|
const int k = clamp(bx + i - 1, 1, nx_);
|
|
|
|
V1_shared[j][i] = V1_row[k];
|
|
}
|
|
}
|
|
|
|
//Make sure all threads have read into shared mem
|
|
barrier(CLK_LOCAL_MEM_FENCE);
|
|
|
|
/**
|
|
* Now get all our required variables as short-hands
|
|
* here we use the notation of
|
|
* Var_00 as var_i,j
|
|
* Var_p0 as var_i+1,j
|
|
* Var_0m as var_i,j-1
|
|
* etc
|
|
*/
|
|
const float V_00 = V1_shared[ty+1][tx+1]; //V at "center"
|
|
const float V_0p = V1_shared[ty+2][tx+1]; //V at "north"
|
|
const float V_0m = V1_shared[ty ][tx+1]; //V at "south"
|
|
const float V_p0 = V1_shared[ty+1][tx+2]; //V at "east"
|
|
const float V_m0 = V1_shared[ty+1][tx ]; //V at "west"
|
|
|
|
const float U_00 = U1_shared[ty ][tx+1];
|
|
const float U_0p = U1_shared[ty+1][tx+1];
|
|
const float U_m0 = U1_shared[ty ][tx ];
|
|
const float U_mp = U1_shared[ty+1][tx ];
|
|
|
|
const float H_m0 = H_shared[ty ][tx ];
|
|
const float H_00 = H_shared[ty ][tx+1];
|
|
const float H_p0 = H_shared[ty ][tx+2];
|
|
const float H_mp = H_shared[ty+1][tx ];
|
|
const float H_0p = H_shared[ty+1][tx+1];
|
|
const float H_pp = H_shared[ty+1][tx+2];
|
|
|
|
const float eta_m0 = eta1_shared[ty ][tx ];
|
|
const float eta_00 = eta1_shared[ty ][tx+1];
|
|
const float eta_p0 = eta1_shared[ty ][tx+2];
|
|
const float eta_mp = eta1_shared[ty+1][tx ];
|
|
const float eta_0p = eta1_shared[ty+1][tx+1];
|
|
const float eta_pp = eta1_shared[ty+1][tx+2];
|
|
|
|
|
|
//Reconstruct H_bar and H_y (at the V position)
|
|
const float H_bar_m0 = 0.25f*(H_m0 + H_mp + H_00 + H_0p);
|
|
const float H_bar_00 = 0.25f*(H_00 + H_0p + H_p0 + H_pp);
|
|
const float H_y = 0.5f*(H_00 + H_0p);
|
|
|
|
//Reconstruct Eta_bar at the V position
|
|
const float eta_bar_m0 = 0.25f*(eta_m0 + eta_mp + eta_00 + eta_0p);
|
|
const float eta_bar_00 = 0.25f*(eta_00 + eta_0p + eta_p0 + eta_pp);
|
|
|
|
//Reconstruct U at the V position
|
|
const float U_bar = 0.25f*(U_m0 + U_00 + U_mp + U_0p);
|
|
|
|
//Calculate the friction coefficient
|
|
const float C = 1.0 + 2*r_*dt_/H_y + 2*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
|
|
|
//Calculate the pressure/gravitational effect
|
|
const float h_0p = H_0p + eta_0p;
|
|
const float h_00 = H_00 + eta_00;
|
|
const float h_y = 0.5*(h_00 + h_0p); //Could possibly use h for pressure terms instead of H
|
|
const float P_y_hat = -0.5f*g_*(eta_0p*eta_0p - eta_00*eta_00);
|
|
const float P_y = -g_*h_y*(eta_0p - eta_00) + P_y_hat;
|
|
|
|
//Calculate nonlinear effects
|
|
const float N_a = (V_0p + V_00)*(V_0p + V_00) / (H_0p + eta_0p);
|
|
const float N_b = (V_00 + V_0m)*(V_00 + V_0m) / (H_00 + eta_00);
|
|
const float N_c = (U_0p + U_00)*(V_p0 + V_00) / (H_bar_00 + eta_bar_00);
|
|
const float N_d = (U_mp + U_m0)*(V_00 + V_m0) / (H_bar_m0 + eta_bar_m0);
|
|
float N = 0.25f*( N_a - N_b + (dy_/dx_)*(N_c - N_d) );
|
|
|
|
//Calculate eddy viscosity term
|
|
float E = (V_p0 - V0 + V_m0)/(dx_*dx_) + (V_0p - V0 + V_0m)/(dy_*dy_);
|
|
|
|
//Calculate the wind shear stress
|
|
float Y = windStressY(
|
|
wind_stress_type_,
|
|
dx_, dy_, dt_,
|
|
tau0_, rho_, alpha_, xm_, Rc_,
|
|
x0_, y0_,
|
|
u0_, v0_,
|
|
t_);
|
|
|
|
//Compute the V at the next timestep
|
|
float V2 = (V0 + 2.0f*dt_*(-f_*U_bar + (N + P_y)/dy_ + Y + A_*E) ) / C;
|
|
|
|
//Write to main memory for internal cells
|
|
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
|
V0_row[ti] = V2;
|
|
}
|
|
} |