/**
This OpenCL kernel implements part of the Centered in Time, Centered
in Space (leapfrog) numerical scheme for the shallow water equations,
described in
L. P. Røed, "Documentation of simple ocean models for use in ensemble
predictions", Met no report 2012/3 and 2012/5 .
Copyright (C) 2016 SINTEF ICT
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#define block_height 8
#define block_width 8
typedef __local float eta_shmem[block_height+2][block_width+1];
typedef __local float u_shmem[block_height+2][block_width+2];
typedef __local float v_shmem[block_height+1][block_width+1];
float windStressX(int wind_stress_type_,
float dx_, float dy_, float dt_,
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
float x0_, float y0_,
float u0_, float v0_,
float t_) {
float X = 0.0f;
switch (wind_stress_type_) {
case 0: //UNIFORM_ALONGSHORE
{
const float y = (get_global_id(1)+0.5f)*dy_;
X = tau0_/rho_ * exp(-alpha_*y);
}
break;
case 1: //BELL_SHAPED_ALONGSHORE
if (t_ <= 48.0f*3600.0f) {
const float a = alpha_*((get_global_id(0)+0.5f)*dx_-xm_);
const float aa = a*a;
const float y = (get_global_id(1)+0.5f)*dy_;
X = tau0_/rho_ * exp(-aa) * exp(-alpha_*y);
}
break;
case 2: //MOVING_CYCLONE
{
const float x = (get_global_id(0))*dx_;
const float y = (get_global_id(1)+0.5f)*dy_;
const float a = (x-x0_-u0_*(t_+dt_));
const float aa = a*a;
const float b = (y-y0_-v0_*(t_+dt_));
const float bb = b*b;
const float r = sqrt(aa+bb);
const float c = 1.0f - r/Rc_;
const float xi = c*c;
X = -(tau0_/rho_) * (b/Rc_) * exp(-0.5f*xi);
}
break;
}
return X;
}
/**
* Kernel that evolves U one step in time.
*/
__kernel void computeUKernel(
//Discretization parameters
int nx_, int ny_,
float dx_, float dy_, float dt_,
//Physical parameters
float g_, //< Gravitational constant
float f_, //< Coriolis coefficient
float r1_, //< Inter-layer friction coefficient
float r2_, //< Bottom friction coefficient
//Numerical diffusion
float A_,
//Density of each layer
float rho1_,
float rho2_,
//Data for layer 1
__global float* H1_ptr_, int H1_pitch_,
__global float* eta1_1_ptr_, int eta1_1_pitch_, // eta^n
__global float* U1_0_ptr_, int U1_0_pitch_, // U^n-1, also output, U^n+1
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
//Data for layer 2
__global float* H2_ptr_, int H2_pitch_,
__global float* eta2_1_ptr_, int eta2_1_pitch_, // eta^n
__global float* U2_0_ptr_, int U2_0_pitch_, // U^n-1, also output, U^n+1
__global float* U2_1_ptr_, int U2_1_pitch_, // U^n
__global float* V2_1_ptr_, int V2_1_pitch_, // V^n
// Wind stress parameters
int wind_stress_type_,
float tau0_, float alpha_, float xm_, float Rc_,
float x0_, float y0_,
float u0_, float v0_,
float t_) {
eta_shmem H1_shared;
eta_shmem eta1_shared;
u_shmem U1_shared;
v_shmem V1_shared;
eta_shmem H2_shared;
eta_shmem eta2_shared;
u_shmem U2_shared;
v_shmem V2_shared;
//Index of thread within block
const int tx = get_local_id(0);
const int ty = get_local_id(1);
//Start of block within domain
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
//Index of cell within domain
const int ti = bx + tx;
const int tj = by + ty;
//Compute pointer to current row in the U array
__global float* const U1_0_row = (__global float*) ((__global char*) U1_0_ptr_ + U1_0_pitch_*tj);
__global float* const U2_0_row = (__global float*) ((__global char*) U2_0_ptr_ + U2_0_pitch_*tj);
//Read current U
float U1_0 = 0.0f;
float U2_0 = 0.0f;
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
U1_0 = U1_0_row[ti];
U2_0 = U2_0_row[ti];
}
//Read H and eta into shared memory: (nx+1)*(ny+2) cells
for (int j=ty; j 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
U1_0_row[ti] = U1_2;
U2_0_row[ti] = U2_2;
}
}