mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-12-23 12:58:43 +01:00
Added initial version of SW code
This commit is contained in:
203
SWESimulators/CDKLM16.py
Normal file
203
SWESimulators/CDKLM16.py
Normal file
@@ -0,0 +1,203 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements
|
||||
Alina Chertock, Michael Dudzinski, A. Kurganov & Maria Lukacova-Medvidova (2016)
|
||||
Well-Balanced Schemes for the Shallow Water Equations with Coriolis Forces
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class CDKLM16:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
u0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
v0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
f: Coriolis parameter (1.2e-4 s^1)
|
||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, f, r, \
|
||||
theta=1.3, use_rk2=True,
|
||||
wind_stress=Common.WindStressParams(), \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.kernel = Common.get_kernel(self.cl_ctx, "CDKLM16_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 3
|
||||
ghost_cells_y = 3
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.f = np.float32(f)
|
||||
self.r = np.float32(r)
|
||||
self.theta = np.float32(theta)
|
||||
self.use_rk2 = use_rk2
|
||||
self.wind_stress = wind_stress
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
if (self.use_rk2):
|
||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.f, \
|
||||
self.r, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.f, \
|
||||
self.r, \
|
||||
np.int32(1), \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
else:
|
||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.f, \
|
||||
self.r, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t
|
||||
|
||||
"""
|
||||
Static function which reads a text file and creates an OpenCL kernel from that
|
||||
"""
|
||||
def get_kernel(self, kernel_filename):
|
||||
#Read the proper program
|
||||
module_path = os.path.dirname(os.path.realpath(__file__))
|
||||
fullpath = os.path.join(module_path, kernel_filename)
|
||||
with open(fullpath, "r") as kernel_file:
|
||||
kernel_string = kernel_file.read()
|
||||
kernel = cl.Program(self.cl_ctx, kernel_string).build()
|
||||
|
||||
return kernel
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
440
SWESimulators/CDKLM16_kernel.opencl
Normal file
440
SWESimulators/CDKLM16_kernel.opencl
Normal file
@@ -0,0 +1,440 @@
|
||||
/*
|
||||
This OpenCL kernel implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
float3 CDKLM16_F_func(const float3 Q, const float g) {
|
||||
float3 F;
|
||||
|
||||
F.x = Q.x*Q.y; //h*u
|
||||
F.y = Q.x*Q.y*Q.y + 0.5f*g*Q.x*Q.x; //h*u*u + 0.5f*g*h*h;
|
||||
F.z = Q.x*Q.y*Q.z; //h*u*v;
|
||||
|
||||
return F;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Note that the input vectors are (h, u, v), thus not the regular
|
||||
* (h, hu, hv)
|
||||
*/
|
||||
float3 CDKLM16_flux(const float3 Qm, float3 Qp, const float g) {
|
||||
const float3 Fp = CDKLM16_F_func(Qp, g);
|
||||
const float up = Qp.y; // u
|
||||
const float cp = sqrt(g*Qp.x); // sqrt(g*h)
|
||||
|
||||
const float3 Fm = CDKLM16_F_func(Qm, g);
|
||||
const float um = Qm.y; // u
|
||||
const float cm = sqrt(g*Qm.x); // sqrt(g*h)
|
||||
|
||||
const float am = min(min(um-cm, up-cp), 0.0f); // largest negative wave speed
|
||||
const float ap = max(max(um+cm, up+cp), 0.0f); // largest positive wave speed
|
||||
|
||||
float3 F;
|
||||
|
||||
F.x = ((ap*Fm.x - am*Fp.x) + ap*am*(Qp.x-Qm.x))/(ap-am);
|
||||
F.y = ((ap*Fm.y - am*Fp.y) + ap*am*(Qp.y-Qm.y))/(ap-am);
|
||||
F.z = (Qm.y + Qp.y > 0) ? Fm.z : Fp.z; //Upwinding to be consistent
|
||||
|
||||
return F;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
float theta_,
|
||||
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
int step_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 3; //Skip global ghost cells, i.e., +3
|
||||
const int tj = get_global_id(1) + 3;
|
||||
|
||||
// Our physical variables
|
||||
__local float R[3][block_height+6][block_width+6];
|
||||
|
||||
// Our reconstruction variables
|
||||
__local float Q[4][block_height+4][block_width+4];
|
||||
__local float Qx[4][block_height][block_width+2];
|
||||
__local float Qy[4][block_height+2][block_width];
|
||||
|
||||
// Our fluxes
|
||||
__local float F[3][block_height][block_width+1];
|
||||
__local float G[3][block_height+1][block_width];
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
for (int j=ty; j<block_height+6; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j, 0, ny_+5); // Out of bounds
|
||||
|
||||
//Compute the pointer to current row in the arrays
|
||||
__global float* const h_row = (__global float*) ((__global char*) h0_ptr_ + h0_pitch_*l);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu0_ptr_ + hu0_pitch_*l);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv0_ptr_ + hv0_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+6; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i, 0, nx_+5); // Out of bounds
|
||||
|
||||
R[0][j][i] = h_row[k];
|
||||
R[1][j][i] = hu_row[k];
|
||||
R[2][j][i] = hv_row[k];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Fix boundary conditions
|
||||
{
|
||||
const int i = tx + 3; //Skip local ghost cells, i.e., +3
|
||||
const int j = ty + 3;
|
||||
|
||||
if (ti == 3) {
|
||||
R[0][j][i-1] = R[0][j][i];
|
||||
R[1][j][i-1] = -R[1][j][i];
|
||||
R[2][j][i-1] = R[2][j][i];
|
||||
|
||||
R[0][j][i-2] = R[0][j][i+1];
|
||||
R[1][j][i-2] = -R[1][j][i+1];
|
||||
R[2][j][i-2] = R[2][j][i+1];
|
||||
|
||||
R[0][j][i-3] = R[0][j][i+2];
|
||||
R[1][j][i-3] = -R[1][j][i+2];
|
||||
R[2][j][i-3] = R[2][j][i+2];
|
||||
}
|
||||
if (ti == nx_+2) {
|
||||
R[0][j][i+1] = R[0][j][i];
|
||||
R[1][j][i+1] = -R[1][j][i];
|
||||
R[2][j][i+1] = R[2][j][i];
|
||||
|
||||
R[0][j][i+2] = R[0][j][i-1];
|
||||
R[1][j][i+2] = -R[1][j][i-1];
|
||||
R[2][j][i+2] = R[2][j][i-1];
|
||||
|
||||
R[0][j][i+3] = R[0][j][i-2];
|
||||
R[1][j][i+3] = -R[1][j][i-2];
|
||||
R[2][j][i+3] = R[2][j][i-2];
|
||||
}
|
||||
if (tj == 3) {
|
||||
R[0][j-1][i] = R[0][j][i];
|
||||
R[1][j-1][i] = R[1][j][i];
|
||||
R[2][j-1][i] = -R[2][j][i];
|
||||
|
||||
R[0][j-2][i] = R[0][j+1][i];
|
||||
R[1][j-2][i] = R[1][j+1][i];
|
||||
R[2][j-2][i] = -R[2][j+1][i];
|
||||
|
||||
R[0][j-3][i] = R[0][j+2][i];
|
||||
R[1][j-3][i] = R[1][j+2][i];
|
||||
R[2][j-3][i] = -R[2][j+2][i];
|
||||
}
|
||||
if (tj == ny_+2) {
|
||||
R[0][j+1][i] = R[0][j][i];
|
||||
R[1][j+1][i] = R[1][j][i];
|
||||
R[2][j+1][i] = -R[2][j][i];
|
||||
|
||||
R[0][j+2][i] = R[0][j-1][i];
|
||||
R[1][j+2][i] = R[1][j-1][i];
|
||||
R[2][j+2][i] = -R[2][j-1][i];
|
||||
|
||||
R[0][j+3][i] = R[0][j-2][i];
|
||||
R[1][j+3][i] = R[1][j-2][i];
|
||||
R[2][j+3][i] = -R[2][j-2][i];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Create our "steady state" reconstruction variables (u, v, K, L)
|
||||
for (int j=ty; j<block_height+4; j+=get_local_size(1)) {
|
||||
const int l = j + 1; //Skip one "ghost cell row" of Q, going from 6x6 to 4x4 "halo"
|
||||
for (int i=tx; i<block_width+4; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
|
||||
const float h = R[0][l][k];
|
||||
const float u = R[1][l][k] / h;
|
||||
const float v = R[2][l][k] / h;
|
||||
|
||||
const float B = 0.0f;
|
||||
const float U = 0.25f * f_/g_ * (1.0*R[1][l+1][k]/R[0][l+1][k] + 2.0f*u + 1.0f*R[1][l-1][k]/R[0][l-1][k]);
|
||||
const float V = 0.25f * f_/g_ * (1.0*R[2][l][k+1]/R[0][l][k+1] + 2.0f*v + 1.0f*R[2][l][k-1]/R[0][l][k-1]);
|
||||
//const float U = f_/g_ * u;
|
||||
//const float V = f_/g_ * v;
|
||||
const float K = h + B - V;
|
||||
const float L = h + B + U;
|
||||
|
||||
Q[0][j][i] = u;
|
||||
Q[1][j][i] = v;
|
||||
Q[2][j][i] = K;
|
||||
Q[3][j][i] = L;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Reconstruct slopes along x axis
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
for (int p=0; p<4; ++p) {
|
||||
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Reconstruct slopes along y axis
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
for (int p=0; p<4; ++p) {
|
||||
Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_);
|
||||
}
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Compute fluxes along the x axis
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells (be consistent with reconstruction offsets)
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
|
||||
// R=(u, v, K, L) reconstructed at a cell interface from the right (p) and left (m)
|
||||
const float4 Rp = (float4)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1],
|
||||
Q[3][l][k+1] - 0.5f*Qx[3][j][i+1]);
|
||||
const float4 Rm = (float4)(Q[0][l][k ] + 0.5f*Qx[0][j][i ],
|
||||
Q[1][l][k ] + 0.5f*Qx[1][j][i ],
|
||||
Q[2][l][k ] + 0.5f*Qx[2][j][i ],
|
||||
Q[3][l][k ] + 0.5f*Qx[3][j][i ]);
|
||||
|
||||
// Variables to reconstruct h from u, v, K, L
|
||||
const float vp = Q[1][l][k+1];
|
||||
const float vm = Q[1][l][k ];
|
||||
const float V = 0.5f * f_/g_ * (vp + vm);
|
||||
const float B = 0.0f;
|
||||
|
||||
// Reconstruct h = K/g + V - B
|
||||
const float hp = Rp.z + V - B;
|
||||
const float hm = Rm.z + V - B;
|
||||
|
||||
// Our flux variables Q=(h, u, v)
|
||||
const float3 Qp = (float3)(hp, Rp.x, Rp.y);
|
||||
const float3 Qm = (float3)(hm, Rm.x, Rm.y);
|
||||
|
||||
// Computed flux
|
||||
const float3 flux = CDKLM16_flux(Qm, Qp, g_);
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
|
||||
//Compute fluxes along the y axis
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
// Q at interface from the right and left
|
||||
const float4 Rp = (float4)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i],
|
||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||
Q[3][l+1][k] - 0.5f*Qy[3][j+1][i]);
|
||||
const float4 Rm = (float4)(Q[0][l ][k] + 0.5f*Qy[0][j ][i],
|
||||
Q[1][l ][k] + 0.5f*Qy[1][j ][i],
|
||||
Q[2][l ][k] + 0.5f*Qy[2][j ][i],
|
||||
Q[3][l ][k] + 0.5f*Qy[3][j ][i]);
|
||||
|
||||
// Variables to reconstruct h from u, v, K, L
|
||||
const float up = Q[0][l+1][k];
|
||||
const float um = Q[0][l ][k];
|
||||
const float U = 0.5f * f_/g_ * (up + um);
|
||||
const float B = 0.0f;
|
||||
|
||||
// Reconstruct h = L/g - U - B
|
||||
const float hp = Rp.w - U - B;
|
||||
const float hm = Rm.w - U - B;
|
||||
|
||||
// Our flux variables Q=(h, v, u)
|
||||
// Note that we swap u and v
|
||||
const float3 Qp = (float3)(hp, Rp.y, Rp.x);
|
||||
const float3 Qm = (float3)(hm, Rm.y, Rm.x);
|
||||
|
||||
// Computed flux
|
||||
// Note that we swap back u and v
|
||||
const float3 flux = CDKLM16_flux(Qm, Qp, g_);
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Sum fluxes and advance in time for all internal cells
|
||||
if (ti > 2 && ti < nx_+3 && tj > 2 && tj < ny_+3) {
|
||||
const int i = tx + 3; //Skip local ghost cells, i.e., +2
|
||||
const int j = ty + 3;
|
||||
|
||||
const float X = windStressX(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
const float Y = windStressY(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
const float h1 = R[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_;
|
||||
const float hu1 = R[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_
|
||||
+ dt_*X - dt_*f_*R[2][j][i];
|
||||
const float hv1 = R[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_
|
||||
+ dt_*Y + dt_*f_*R[1][j][i];
|
||||
|
||||
__global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj);
|
||||
|
||||
const float C = 2.0f*r_*dt_/R[0][j][i];
|
||||
|
||||
if (step_ == 0) {
|
||||
//First step of RK2 ODE integrator
|
||||
|
||||
h_row[ti] = h1;
|
||||
hu_row[ti] = hu1 / (1.0f + C);
|
||||
hv_row[ti] = hv1 / (1.0f + C);
|
||||
}
|
||||
else if (step_ == 1) {
|
||||
//Second step of RK2 ODE integrator
|
||||
|
||||
//First read Q^n
|
||||
const float h_a = h_row[ti];
|
||||
const float hu_a = hu_row[ti];
|
||||
const float hv_a = hv_row[ti];
|
||||
|
||||
//Compute Q^n+1
|
||||
const float h_b = 0.5f*(h_a + h1);
|
||||
const float hu_b = 0.5f*(hu_a + hu1);
|
||||
const float hv_b = 0.5f*(hv_a + hv1);
|
||||
|
||||
//Write to main memory
|
||||
h_row[ti] = h_b;
|
||||
hu_row[ti] = hu_b / (1.0f + 0.5f*C);
|
||||
hv_row[ti] = hv_b / (1.0f + 0.5f*C);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
193
SWESimulators/CTCS.py
Normal file
193
SWESimulators/CTCS.py
Normal file
@@ -0,0 +1,193 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the Centered in Time, Centered in Space
|
||||
(leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Centered in time centered in space scheme
|
||||
"""
|
||||
class CTCS:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
|
||||
eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
f: Coriolis parameter (1.2e-4 s^1)
|
||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||
A: Eddy viscosity coefficient (O(dx))
|
||||
wind_stress: Wind stress parameters
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
H, eta0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, f, r, A, \
|
||||
wind_stress=Common.WindStressParams(), \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
reload(Common)
|
||||
#Get kernels
|
||||
self.u_kernel = Common.get_kernel(self.cl_ctx, "CTCS_U_kernel.opencl", block_width, block_height)
|
||||
self.v_kernel = Common.get_kernel(self.cl_ctx, "CTCS_V_kernel.opencl", block_width, block_height)
|
||||
self.eta_kernel = Common.get_kernel(self.cl_ctx, "CTCS_eta_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.H = Common.OpenCLArray2D(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, H)
|
||||
self.cl_data = Common.SWEDataArkawaC(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.f = np.float32(f)
|
||||
self.r = np.float32(r)
|
||||
self.A = np.float32(A)
|
||||
self.wind_stress = wind_stress
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
#Notation:
|
||||
# cl_data.u0 => U^{n-1} before U kernel, U^{n+1} after U kernel
|
||||
# cl_data.u1 => U^{n}
|
||||
# When we call cl_data.swap(), we swap these, so that
|
||||
# cl_data.u0 => U^{n}
|
||||
# cl_data.u1 => U^{n+1} (U kernel has been executed)
|
||||
# Now we are ready for the next time step
|
||||
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.eta_kernel.computeEtaKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, self.r, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, # eta^{n-1} => eta^{n+1} \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, # U^{n} \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch) # V^{n}
|
||||
|
||||
self.u_kernel.computeUKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, self.r, self.A,\
|
||||
self.H.data, self.H.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, # eta^{n} \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, # U^{n-1} => U^{n+1} \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, # U^{n} \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, # V^{n} \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
|
||||
self.v_kernel.computeVKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, self.r, self.A,\
|
||||
self.H.data, self.H.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, # eta^{n} \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, # U^{n} \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, # V^{n-1} => V^{n+1} \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, # V^{n} \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
|
||||
#After the kernels, swap the data pointers
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
435
SWESimulators/CTCS2Layer.py
Normal file
435
SWESimulators/CTCS2Layer.py
Normal file
@@ -0,0 +1,435 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the Centered in Time, Centered in Space
|
||||
(leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
|
||||
#Import packages we need
|
||||
import os
|
||||
import time
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that holds data for the SW equations in OpenCL
|
||||
"""
|
||||
class CTCS2LayerDataCL:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, cl_ctx, h1_0, eta1_0, u1_0, v1_0, \
|
||||
h2_0, eta2_0, u2_0, v2_0):
|
||||
#Make sure that the data is single precision floating point
|
||||
if (not np.issubdtype(h1_0.dtype, np.float32) or np.isfortran(h1_0)):
|
||||
print "Converting H_0"
|
||||
h1_0 = h1_0.astype(np.float32, order='C')
|
||||
if (not np.issubdtype(eta1_0.dtype, np.float32) or np.isfortran(eta1_0)):
|
||||
print "Converting Eta_0"
|
||||
eta1_0 = eta1_0.astype(np.float32, order='C')
|
||||
if (not np.issubdtype(u1_0.dtype, np.float32) or np.isfortran(u1_0)):
|
||||
print "Converting U_0"
|
||||
u1_0 = u1_0.astype(np.float32, order='C')
|
||||
if (not np.issubdtype(v1_0.dtype, np.float32) or np.isfortran(v1_0)):
|
||||
print "Converting V_0"
|
||||
v1_0 = v1_0.astype(np.float32, order='C')
|
||||
|
||||
#Same for second (deepest) layer
|
||||
if (not np.issubdtype(h2_0.dtype, np.float32) or np.isfortran(h2_0)):
|
||||
print "Converting H2_0"
|
||||
h2_0 = h2_0.astype(np.float32, order='C')
|
||||
if (not np.issubdtype(eta2_0.dtype, np.float32) or np.isfortran(eta2_0)):
|
||||
print "Converting Eta2_0"
|
||||
eta2_0 = eta2_0.astype(np.float32, order='C')
|
||||
if (not np.issubdtype(u2_0.dtype, np.float32) or np.isfortran(u2_0)):
|
||||
print "Converting U2_0"
|
||||
u2_0 = u2_0.astype(np.float32, order='C')
|
||||
if (not np.issubdtype(v2_0.dtype, np.float32) or np.isfortran(v2_0)):
|
||||
print "Converting V2_0"
|
||||
v2_0 = v2_0.astype(np.float32, order='C')
|
||||
|
||||
self.ny, self.nx = h1_0.shape
|
||||
self.nx = self.nx - 2 # Ghost cells
|
||||
self.ny = self.ny - 2
|
||||
|
||||
assert(h1_0.shape == (self.ny+2, self.nx+2))
|
||||
assert(eta1_0.shape == (self.ny+2, self.nx+2))
|
||||
assert(u1_0.shape == (self.ny+2, self.nx+1))
|
||||
assert(v1_0.shape == (self.ny+1, self.nx+2))
|
||||
|
||||
#Same for layer 2
|
||||
assert(h2_0.shape == (self.ny+2, self.nx+2))
|
||||
assert(eta2_0.shape == (self.ny+2, self.nx+2))
|
||||
assert(u2_0.shape == (self.ny+2, self.nx+1))
|
||||
assert(v2_0.shape == (self.ny+1, self.nx+2))
|
||||
|
||||
#Upload data to the device
|
||||
mf = cl.mem_flags
|
||||
self.h1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=h1_0)
|
||||
|
||||
self.eta1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta1_0)
|
||||
self.eta1_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta1_0)
|
||||
|
||||
self.u1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u1_0)
|
||||
self.u1_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u1_0)
|
||||
|
||||
self.v1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v1_0)
|
||||
self.v1_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v1_0)
|
||||
|
||||
#Same for layer 2
|
||||
self.h2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=h2_0)
|
||||
|
||||
self.eta2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta2_0)
|
||||
self.eta2_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta2_0)
|
||||
|
||||
self.u2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u2_0)
|
||||
self.u2_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u2_0)
|
||||
|
||||
self.v2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v2_0)
|
||||
self.v2_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v2_0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#Compute pitches
|
||||
self.h1_0_pitch = np.int32(h1_0.shape[1]*4)
|
||||
|
||||
self.eta1_0_pitch = np.int32(eta1_0.shape[1]*4)
|
||||
self.eta1_1_pitch = np.int32(eta1_0.shape[1]*4)
|
||||
|
||||
self.u1_0_pitch = np.int32(u1_0.shape[1]*4)
|
||||
self.u1_1_pitch = np.int32(u1_0.shape[1]*4)
|
||||
|
||||
self.v1_0_pitch = np.int32(v1_0.shape[1]*4)
|
||||
self.v1_1_pitch = np.int32(v1_0.shape[1]*4)
|
||||
|
||||
#Same for layer 2
|
||||
self.h2_0_pitch = np.int32(h2_0.shape[1]*4)
|
||||
|
||||
self.eta2_0_pitch = np.int32(eta2_0.shape[1]*4)
|
||||
self.eta2_1_pitch = np.int32(eta2_0.shape[1]*4)
|
||||
|
||||
self.u2_0_pitch = np.int32(u2_0.shape[1]*4)
|
||||
self.u2_1_pitch = np.int32(u2_0.shape[1]*4)
|
||||
|
||||
self.v2_0_pitch = np.int32(v2_0.shape[1]*4)
|
||||
self.v2_1_pitch = np.int32(v2_0.shape[1]*4)
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Swaps the variables after a timestep has been completed
|
||||
"""
|
||||
def swap(self):
|
||||
self.eta1_1, self.eta1_0 = self.eta1_0, self.eta1_1
|
||||
self.u1_1, self.u1_0 = self.u1_0, self.u1_1
|
||||
self.v1_1, self.v1_0 = self.v1_0, self.v1_1
|
||||
|
||||
#Same for layer 2
|
||||
self.eta2_1, self.eta2_0 = self.eta2_0, self.eta2_1
|
||||
self.u2_1, self.u2_0 = self.u2_0, self.u2_1
|
||||
self.v2_1, self.v2_0 = self.v2_0, self.v2_1
|
||||
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, cl_queue):
|
||||
#Allocate data on the host for result
|
||||
eta1_1 = np.empty((self.ny+2, self.nx+2), dtype=np.float32, order='C')
|
||||
u1_1 = np.empty((self.ny+2, self.nx+1), dtype=np.float32, order='C')
|
||||
v1_1 = np.empty((self.ny+1, self.nx+2), dtype=np.float32, order='C')
|
||||
|
||||
#Same for layer 2
|
||||
eta2_1 = np.empty((self.ny+2, self.nx+2), dtype=np.float32, order='C')
|
||||
u2_1 = np.empty((self.ny+2, self.nx+1), dtype=np.float32, order='C')
|
||||
v2_1 = np.empty((self.ny+1, self.nx+2), dtype=np.float32, order='C')
|
||||
|
||||
#Copy data from device to host
|
||||
cl.enqueue_copy(cl_queue, eta1_1, self.eta1_1)
|
||||
cl.enqueue_copy(cl_queue, u1_1, self.u1_1)
|
||||
cl.enqueue_copy(cl_queue, v1_1, self.v1_1)
|
||||
|
||||
#Same for layer 2
|
||||
cl.enqueue_copy(cl_queue, eta2_1, self.eta2_1)
|
||||
cl.enqueue_copy(cl_queue, u2_1, self.u2_1)
|
||||
cl.enqueue_copy(cl_queue, v2_1, self.v2_1)
|
||||
|
||||
|
||||
#Return
|
||||
return eta1_1, u1_1, v1_1, eta2_1, u2_1, v2_1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Centered in time centered in space scheme
|
||||
"""
|
||||
class CTCS2Layer:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h1_0: Water depth incl ghost cells, (nx+2)*(ny+2) cells
|
||||
eta1_0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
|
||||
u1_0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
||||
v1_0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
||||
h2_0: Water depth (layer 2) incl ghost cells, (nx+2)*(ny+2) cells
|
||||
eta2_0: Initial deviation from mean sea level (layer 2) incl ghost cells, (nx+2)*(ny+2) cells
|
||||
u2_0: Initial momentum (layer 2) along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
||||
v2_0: Initial momentum (layer 2) along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
f: Coriolis parameter (1.2e-4 s^1)
|
||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||
r2: Inter-layer friction coefficient (m/s)
|
||||
A: Eddy viscosity coefficient (O(dx))
|
||||
rho1: Density of upper layer (1025.0 kg / m^3)
|
||||
rho2: Density of lower layer (1000.0 kg / m^3)
|
||||
wind_type: Type of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
||||
wind_tau0: Amplitude of wind stress (Pa)
|
||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
||||
"""
|
||||
def __init__(self, \
|
||||
h1_0, eta1_0, u1_0, v1_0, \
|
||||
h2_0, eta2_0, u2_0, v2_0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, f, r1, r2, A, \
|
||||
rho1, rho2,
|
||||
wind_type=99, # "no wind" \
|
||||
wind_tau0=0, wind_alpha=0, wind_xm=0, wind_Rc=0, \
|
||||
wind_x0=0, wind_y0=0, \
|
||||
wind_u0=0, wind_v0=0):
|
||||
#Make sure we get compiler output from OpenCL
|
||||
os.environ["PYOPENCL_COMPILER_OUTPUT"] = "1"
|
||||
|
||||
#Set which CL device to use
|
||||
os.environ["PYOPENCL_CTX"] = "1"
|
||||
|
||||
#Create OpenCL context
|
||||
self.cl_ctx = cl.create_some_context()
|
||||
print "Using ", self.cl_ctx.devices[0].name
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.u_kernel = self.get_kernel("CTCS2Layer_U_kernel.opencl")
|
||||
self.v_kernel = self.get_kernel("CTCS2Layer_V_kernel.opencl")
|
||||
self.eta_kernel = self.get_kernel("CTCS2Layer_eta_kernel.opencl")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.cl_data = CTCS2LayerDataCL(self.cl_ctx, h1_0, eta1_0, u1_0, v1_0, h2_0, eta2_0, u2_0, v2_0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.f = np.float32(f)
|
||||
self.r1 = np.float32(r1)
|
||||
self.r2 = np.float32(r2)
|
||||
self.A = np.float32(A)
|
||||
assert(rho1 <= rho2)
|
||||
self.rho1 = np.float32(rho1)
|
||||
self.rho2 = np.float32(rho2)
|
||||
self.wind_type = np.int32(wind_type)
|
||||
self.wind_tau0 = np.float32(wind_tau0)
|
||||
self.wind_alpha = np.float32(wind_alpha)
|
||||
self.wind_xm = np.float32(wind_xm)
|
||||
self.wind_Rc = np.float32(wind_Rc)
|
||||
self.wind_x0 = np.float32(wind_x0)
|
||||
self.wind_y0 = np.float32(wind_y0)
|
||||
self.wind_u0 = np.float32(wind_u0)
|
||||
self.wind_v0 = np.float32(wind_v0)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (8, 8) # WARNING::: MUST MATCH defines of block_width/height in kernels!
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
#Notation:
|
||||
# cl_data.u0 => U^{n-1} before U kernel, U^{n+1} after U kernel
|
||||
# cl_data.u1 => U^{n}
|
||||
# When we call cl_data.swap(), we swap these, so that
|
||||
# cl_data.u0 => U^{n}
|
||||
# cl_data.u1 => U^{n+1} (U kernel has been executed)
|
||||
# Now we are ready for the next time step
|
||||
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.eta_kernel.computeEtaKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
\
|
||||
self.cl_data.eta1_0, self.cl_data.eta1_0_pitch, # eta^{n-1} => eta^{n+1} \
|
||||
self.cl_data.u1_1, self.cl_data.u1_1_pitch, # U^{n} \
|
||||
self.cl_data.v1_1, self.cl_data.v1_1_pitch, # V^{n}
|
||||
\
|
||||
self.cl_data.eta2_0, self.cl_data.eta2_0_pitch, \
|
||||
self.cl_data.u2_1, self.cl_data.u2_1_pitch, \
|
||||
self.cl_data.v2_1, self.cl_data.v2_1_pitch)
|
||||
|
||||
self.u_kernel.computeUKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, \
|
||||
self.r1, self.r2, \
|
||||
self.A, \
|
||||
self.rho1, self.rho2, \
|
||||
\
|
||||
self.cl_data.h1_0, self.cl_data.h1_0_pitch, \
|
||||
self.cl_data.eta1_1, self.cl_data.eta1_1_pitch, # eta^{n} \
|
||||
self.cl_data.u1_0, self.cl_data.u1_0_pitch, # U^{n-1} => U^{n+1} \
|
||||
self.cl_data.u1_1, self.cl_data.u1_1_pitch, # U^{n} \
|
||||
self.cl_data.v1_1, self.cl_data.v1_1_pitch, # V^{n} \
|
||||
\
|
||||
self.cl_data.h2_0, self.cl_data.h2_0_pitch, \
|
||||
self.cl_data.eta2_1, self.cl_data.eta2_1_pitch, \
|
||||
self.cl_data.u2_0, self.cl_data.u2_0_pitch, \
|
||||
self.cl_data.u2_1, self.cl_data.u2_1_pitch, \
|
||||
self.cl_data.v2_1, self.cl_data.v2_1_pitch, \
|
||||
\
|
||||
self.wind_type, \
|
||||
self.wind_tau0, self.wind_alpha, self.wind_xm, self.wind_Rc, \
|
||||
self.wind_x0, self.wind_y0, \
|
||||
self.wind_u0, self.wind_v0, \
|
||||
self.t)
|
||||
|
||||
self.v_kernel.computeVKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, \
|
||||
self.r1, self.r2, \
|
||||
self.A, \
|
||||
self.rho1, self.rho2, \
|
||||
\
|
||||
self.cl_data.h1_0, self.cl_data.h1_0_pitch, \
|
||||
self.cl_data.eta1_1, self.cl_data.eta1_1_pitch, # eta^{n} \
|
||||
self.cl_data.u1_1, self.cl_data.u1_1_pitch, # U^{n} \
|
||||
self.cl_data.v1_0, self.cl_data.v1_0_pitch, # V^{n-1} => V^{n+1} \
|
||||
self.cl_data.v1_1, self.cl_data.v1_1_pitch, # V^{n} \
|
||||
\
|
||||
self.cl_data.h2_0, self.cl_data.h2_0_pitch, \
|
||||
self.cl_data.eta2_1, self.cl_data.eta2_1_pitch, \
|
||||
self.cl_data.u2_1, self.cl_data.u2_1_pitch, \
|
||||
self.cl_data.v2_0, self.cl_data.v2_0_pitch, \
|
||||
self.cl_data.v2_1, self.cl_data.v2_1_pitch, \
|
||||
\
|
||||
self.wind_type, \
|
||||
self.wind_tau0, self.wind_alpha, self.wind_xm, self.wind_Rc, \
|
||||
self.wind_x0, self.wind_y0, \
|
||||
self.wind_u0, self.wind_v0, \
|
||||
self.t)
|
||||
|
||||
|
||||
#After the kernels, swap the data pointers
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
return self.t
|
||||
|
||||
"""
|
||||
Static function which reads a text file and creates an OpenCL kernel from that
|
||||
"""
|
||||
def get_kernel(self, kernel_filename):
|
||||
#Read the proper program
|
||||
module_path = os.path.dirname(os.path.realpath(__file__))
|
||||
fullpath = os.path.join(module_path, kernel_filename)
|
||||
with open(fullpath, "r") as kernel_file:
|
||||
kernel_string = kernel_file.read()
|
||||
kernel = cl.Program(self.cl_ctx, kernel_string).build()
|
||||
|
||||
return kernel
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
414
SWESimulators/CTCS2Layer_U_kernel.opencl
Normal file
414
SWESimulators/CTCS2Layer_U_kernel.opencl
Normal file
@@ -0,0 +1,414 @@
|
||||
/**
|
||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#define block_height 8
|
||||
#define block_width 8
|
||||
|
||||
|
||||
typedef __local float eta_shmem[block_height+2][block_width+1];
|
||||
typedef __local float u_shmem[block_height+2][block_width+2];
|
||||
typedef __local float v_shmem[block_height+1][block_width+1];
|
||||
|
||||
|
||||
|
||||
float windStressX(int wind_stress_type_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
float X = 0.0f;
|
||||
|
||||
switch (wind_stress_type_) {
|
||||
case 0: //UNIFORM_ALONGSHORE
|
||||
{
|
||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
||||
X = tau0_/rho_ * exp(-alpha_*y);
|
||||
}
|
||||
break;
|
||||
case 1: //BELL_SHAPED_ALONGSHORE
|
||||
if (t_ <= 48.0f*3600.0f) {
|
||||
const float a = alpha_*((get_global_id(0)+0.5f)*dx_-xm_);
|
||||
const float aa = a*a;
|
||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
||||
X = tau0_/rho_ * exp(-aa) * exp(-alpha_*y);
|
||||
}
|
||||
break;
|
||||
case 2: //MOVING_CYCLONE
|
||||
{
|
||||
const float x = (get_global_id(0))*dx_;
|
||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
||||
const float a = (x-x0_-u0_*(t_+dt_));
|
||||
const float aa = a*a;
|
||||
const float b = (y-y0_-v0_*(t_+dt_));
|
||||
const float bb = b*b;
|
||||
const float r = sqrt(aa+bb);
|
||||
const float c = 1.0f - r/Rc_;
|
||||
const float xi = c*c;
|
||||
|
||||
X = -(tau0_/rho_) * (b/Rc_) * exp(-0.5f*xi);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return X;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves U one step in time.
|
||||
*/
|
||||
__kernel void computeUKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r1_, //< Inter-layer friction coefficient
|
||||
float r2_, //< Bottom friction coefficient
|
||||
|
||||
//Numerical diffusion
|
||||
float A_,
|
||||
|
||||
//Density of each layer
|
||||
float rho1_,
|
||||
float rho2_,
|
||||
|
||||
//Data for layer 1
|
||||
__global float* H1_ptr_, int H1_pitch_,
|
||||
__global float* eta1_1_ptr_, int eta1_1_pitch_, // eta^n
|
||||
__global float* U1_0_ptr_, int U1_0_pitch_, // U^n-1, also output, U^n+1
|
||||
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
|
||||
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
|
||||
|
||||
//Data for layer 2
|
||||
__global float* H2_ptr_, int H2_pitch_,
|
||||
__global float* eta2_1_ptr_, int eta2_1_pitch_, // eta^n
|
||||
__global float* U2_0_ptr_, int U2_0_pitch_, // U^n-1, also output, U^n+1
|
||||
__global float* U2_1_ptr_, int U2_1_pitch_, // U^n
|
||||
__global float* V2_1_ptr_, int V2_1_pitch_, // V^n
|
||||
|
||||
// Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
eta_shmem H1_shared;
|
||||
eta_shmem eta1_shared;
|
||||
u_shmem U1_shared;
|
||||
v_shmem V1_shared;
|
||||
|
||||
eta_shmem H2_shared;
|
||||
eta_shmem eta2_shared;
|
||||
u_shmem U2_shared;
|
||||
v_shmem V2_shared;
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Start of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = bx + tx;
|
||||
const int tj = by + ty;
|
||||
|
||||
//Compute pointer to current row in the U array
|
||||
__global float* const U1_0_row = (__global float*) ((__global char*) U1_0_ptr_ + U1_0_pitch_*tj);
|
||||
__global float* const U2_0_row = (__global float*) ((__global char*) U2_0_ptr_ + U2_0_pitch_*tj);
|
||||
|
||||
//Read current U
|
||||
float U1_0 = 0.0f;
|
||||
float U2_0 = 0.0f;
|
||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
||||
U1_0 = U1_0_row[ti];
|
||||
U2_0 = U2_0_row[ti];
|
||||
}
|
||||
|
||||
//Read H and eta into shared memory: (nx+1)*(ny+2) cells
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int l = clamp(by + j - 1, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the H and eta arrays
|
||||
__global float* const H1_row = (__global float*) ((__global char*) H1_ptr_ + H1_pitch_*l);
|
||||
__global float* const H2_row = (__global float*) ((__global char*) H2_ptr_ + H2_pitch_*l);
|
||||
|
||||
__global float* const eta1_1_row = (__global float*) ((__global char*) eta1_1_ptr_ + eta1_1_pitch_*l);
|
||||
__global float* const eta2_1_row = (__global float*) ((__global char*) eta2_1_ptr_ + eta2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int k = clamp(bx + i, 1, nx_);
|
||||
|
||||
H1_shared[j][i] = H1_row[k];
|
||||
H2_shared[j][i] = H2_row[k];
|
||||
|
||||
eta1_shared[j][i] = eta1_1_row[k];
|
||||
eta2_shared[j][i] = eta2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Read U into shared memory: (nx+2)*(ny+2) cells
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int l = clamp(by + j - 1, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const U1_1_row = (__global float*) ((__global char*) U1_1_ptr_ + U1_1_pitch_*l);
|
||||
__global float* const U2_1_row = (__global float*) ((__global char*) U2_1_ptr_ + U2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
// Prevent out-of-bounds
|
||||
const int k = clamp(bx + i - 1, 0, nx_);
|
||||
|
||||
U1_shared[j][i] = U1_1_row[k];
|
||||
U2_shared[j][i] = U2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Read V into shared memory: (nx+1)*(ny+1) cells
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
// Prevent out-of-bounds
|
||||
const int l = clamp(by + j - 1, 0, ny_);
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const V1_1_row = (__global float*) ((__global char*) V1_1_ptr_ + V1_1_pitch_*l);
|
||||
__global float* const V2_1_row = (__global float*) ((__global char*) V2_1_ptr_ + V2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int k = clamp(bx + i, 1, nx_);
|
||||
|
||||
V1_shared[j][i] = V1_1_row[k];
|
||||
V2_shared[j][i] = V2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
/**
|
||||
* Now get all our required variables as short-hands
|
||||
* here we use the notation of
|
||||
* Var1_00 as var_i,j for layer 1
|
||||
* Var2_p0 as var_i+1,j for layer 2
|
||||
* Var1_0m as var_i,j-1 for layer 1
|
||||
* etc
|
||||
*/
|
||||
//Layer 1
|
||||
const float U1_00 = U1_shared[ty+1][tx+1]; //U at "center"
|
||||
const float U1_0p = U1_shared[ty+2][tx+1]; //U at "north"
|
||||
const float U1_0m = U1_shared[ty ][tx+1]; //U at "south"
|
||||
const float U1_p0 = U1_shared[ty+1][tx+2]; //U at "east"
|
||||
const float U1_m0 = U1_shared[ty+1][tx ]; //U at "west"
|
||||
|
||||
const float V1_00 = V1_shared[ty+1][tx ];
|
||||
const float V1_p0 = V1_shared[ty+1][tx+1];
|
||||
const float V1_0m = V1_shared[ty ][tx ];
|
||||
const float V1_pm = V1_shared[ty ][tx+1];
|
||||
|
||||
const float H1_0m = H1_shared[ty ][tx ];
|
||||
const float H1_00 = H1_shared[ty+1][tx ];
|
||||
const float H1_0p = H1_shared[ty+2][tx ];
|
||||
const float H1_pm = H1_shared[ty ][tx+1];
|
||||
const float H1_p0 = H1_shared[ty+1][tx+1];
|
||||
const float H1_pp = H1_shared[ty+2][tx+1];
|
||||
|
||||
const float eta1_0m = eta1_shared[ty ][tx ];
|
||||
const float eta1_00 = eta1_shared[ty+1][tx ];
|
||||
const float eta1_0p = eta1_shared[ty+2][tx ];
|
||||
const float eta1_pm = eta1_shared[ty ][tx+1];
|
||||
const float eta1_p0 = eta1_shared[ty+1][tx+1];
|
||||
const float eta1_pp = eta1_shared[ty+2][tx+1];
|
||||
|
||||
|
||||
//Layer 2 (bottom)
|
||||
const float U2_00 = U2_shared[ty+1][tx+1];
|
||||
const float U2_0p = U2_shared[ty+2][tx+1];
|
||||
const float U2_0m = U2_shared[ty ][tx+1];
|
||||
const float U2_p0 = U2_shared[ty+1][tx+2];
|
||||
const float U2_m0 = U2_shared[ty+1][tx ];
|
||||
|
||||
const float V2_00 = V2_shared[ty+1][tx ];
|
||||
const float V2_p0 = V2_shared[ty+1][tx+1];
|
||||
const float V2_0m = V2_shared[ty ][tx ];
|
||||
const float V2_pm = V2_shared[ty ][tx+1];
|
||||
|
||||
const float H2_0m = H2_shared[ty ][tx ];
|
||||
const float H2_00 = H2_shared[ty+1][tx ];
|
||||
const float H2_0p = H2_shared[ty+2][tx ];
|
||||
const float H2_pm = H2_shared[ty ][tx+1];
|
||||
const float H2_p0 = H2_shared[ty+1][tx+1];
|
||||
const float H2_pp = H2_shared[ty+2][tx+1];
|
||||
|
||||
const float eta2_0m = eta2_shared[ty ][tx ];
|
||||
const float eta2_00 = eta2_shared[ty+1][tx ];
|
||||
const float eta2_0p = eta2_shared[ty+2][tx ];
|
||||
const float eta2_pm = eta2_shared[ty ][tx+1];
|
||||
const float eta2_p0 = eta2_shared[ty+1][tx+1];
|
||||
const float eta2_pp = eta2_shared[ty+2][tx+1];
|
||||
|
||||
|
||||
|
||||
//Reconstruct Eta_bar at the V position
|
||||
const float eta1_bar_0m = 0.25f*(eta1_0m + eta1_pm + eta1_00 + eta1_p0);
|
||||
const float eta1_bar_00 = 0.25f*(eta1_00 + eta1_p0 + eta1_0p + eta1_pp);
|
||||
|
||||
const float eta2_bar_0m = 0.25f*(eta2_0m + eta2_pm + eta2_00 + eta2_p0);
|
||||
const float eta2_bar_00 = 0.25f*(eta2_00 + eta2_p0 + eta2_0p + eta2_pp);
|
||||
|
||||
|
||||
|
||||
|
||||
//Reconstruct H_bar and H_x (at the U position)
|
||||
const float H1_bar_0m = 0.25f*(H1_0m + H1_pm + H1_00 + H1_p0);
|
||||
const float H1_bar_00 = 0.25f*(H1_00 + H1_p0 + H1_0p + H1_pp);
|
||||
const float H1_x = 0.5f*(H1_00 + H1_p0);
|
||||
|
||||
const float H2_bar_0m = 0.25f*(H2_0m + H2_pm + H2_00 + H2_p0);
|
||||
const float H2_bar_00 = 0.25f*(H2_00 + H2_p0 + H2_0p + H2_pp);
|
||||
const float H2_x = 0.5f*(H2_00 + H2_p0);
|
||||
|
||||
|
||||
|
||||
//Compute layer thickness of top layer
|
||||
const float h1_p0 = H1_p0 + eta1_p0 - eta2_p0;
|
||||
const float h1_00 = H1_00 + eta1_00 - eta2_00;
|
||||
const float h1_bar_0m = H1_bar_0m + eta1_bar_0m - eta2_bar_0m;
|
||||
const float h1_bar_00 = H1_bar_00 + eta1_bar_00 - eta2_bar_00;
|
||||
|
||||
const float h2_p0 = H2_p0 + eta2_p0;
|
||||
const float h2_00 = H2_00 + eta2_00;
|
||||
const float h2_bar_0m = H2_bar_0m + eta2_bar_0m;
|
||||
const float h2_bar_00 = H2_bar_00 + eta2_bar_00;
|
||||
|
||||
|
||||
|
||||
//Compute pressure components
|
||||
const float h1_x = 0.5f*(h1_p0 + h1_00);
|
||||
const float h2_x = 0.5f*(h2_p0 + h2_00);
|
||||
|
||||
//const float epsilon = (rho2_ - rho1_)/rho2_;
|
||||
//const float P1_x = -g_*h1_x * (eta1_p0 - eta1_00 + h2_p0 - h2_00) * (1.0f - epsilon);
|
||||
//const float P2_x = -g_*h2_x * (eta2_p0 - eta2_00 + H2_p0 - H2_00);
|
||||
|
||||
const float P1_x = - g_*h1_x*(eta1_p0 - eta1_00) - 0.5f*g_*(eta1_p0*eta1_p0 - eta1_00*eta1_00);
|
||||
const float P2_x = - g_ * (rho1_/rho2_) *
|
||||
( //Pressure contribution from top layer
|
||||
h2_x*(eta1_p0 - eta1_00) + 0.5f*(eta1_p0*eta1_p0 - eta1_00*eta1_00)
|
||||
)
|
||||
- g_ * ((rho2_ - rho1_)/rho2_) *
|
||||
( //Pressure contribution from bottom layer
|
||||
h2_x*(eta2_p0 - eta2_00) + 0.5f*(eta2_p0*eta2_p0 - eta2_00*eta2_00)
|
||||
);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Reconstruct V at the U position
|
||||
const float V1_bar = 0.25f*(V1_0m + V1_00 + V1_pm + V1_p0);
|
||||
const float V2_bar = 0.25f*(V2_0m + V2_00 + V2_pm + V2_p0);
|
||||
|
||||
|
||||
|
||||
|
||||
//Calculate the bottom and/or inter-layer friction coefficient
|
||||
//FIXME: Should this be h instead of H?
|
||||
const float C1 = r1_/H1_x;
|
||||
const float C2 = r2_/H2_x;
|
||||
|
||||
|
||||
|
||||
|
||||
//Calculate numerical diffusion / subgrid energy loss coefficient
|
||||
const float D = 2.0f*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
||||
|
||||
|
||||
|
||||
//Calculate nonlinear effects
|
||||
const float N1_a = (U1_p0 + U1_00)*(U1_p0 + U1_00) / (h1_p0);
|
||||
const float N1_b = (U1_00 + U1_m0)*(U1_00 + U1_m0) / (h1_00);
|
||||
const float N1_c = (U1_0p + U1_00)*(V1_p0 + V1_00) / (h1_bar_00);
|
||||
const float N1_d = (U1_00 + U1_0m)*(V1_pm + V1_0m) / (h1_bar_0m);
|
||||
const float N1 = 0.25f*( N1_a - N1_b + (dx_/dy_)*(N1_c - N1_d) );
|
||||
|
||||
const float N2_a = (U2_p0 + U2_00)*(U2_p0 + U2_00) / (h2_p0);
|
||||
const float N2_b = (U2_00 + U2_m0)*(U2_00 + U2_m0) / (h2_00);
|
||||
const float N2_c = (U2_0p + U2_00)*(V2_p0 + V2_00) / (h2_bar_00);
|
||||
const float N2_d = (U2_00 + U2_0m)*(V2_pm + V2_0m) / (h2_bar_0m);
|
||||
const float N2 = 0.25f*( N2_a - N2_b + (dx_/dy_)*(N2_c - N2_d) );
|
||||
|
||||
|
||||
|
||||
|
||||
//Calculate eddy viscosity terms
|
||||
const float E1 = (U1_p0 - U1_0 + U1_m0)/(dx_*dx_) + (U1_0p - U1_0 + U1_0m)/(dy_*dy_);
|
||||
const float E2 = (U2_p0 - U2_0 + U2_m0)/(dx_*dx_) + (U2_0p - U2_0 + U2_0m)/(dy_*dy_);
|
||||
|
||||
|
||||
|
||||
//Calculate the wind shear stress for the top layer
|
||||
const float X = windStressX(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho1_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
|
||||
|
||||
//Compute U at the next timestep
|
||||
float U1_2 = (U1_0 + 2.0f*dt_*(f_*V1_bar + (N1 + P1_x)/dx_ + X + C1*U2_0 + A_*E1) ) / (1.0f + D);
|
||||
float U2_2 = (U2_0 + 2.0f*dt_*(f_*V2_bar + (N2 + P2_x)/dx_ + C1*U1_0 + A_*E2) ) / (1.0f + 2.0f*dt_*C2 + D);
|
||||
|
||||
|
||||
|
||||
|
||||
//Write to main memory for internal cells
|
||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
||||
U1_0_row[ti] = U1_2;
|
||||
U2_0_row[ti] = U2_2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
395
SWESimulators/CTCS2Layer_V_kernel.opencl
Normal file
395
SWESimulators/CTCS2Layer_V_kernel.opencl
Normal file
@@ -0,0 +1,395 @@
|
||||
/**
|
||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define block_height 8
|
||||
#define block_width 8
|
||||
|
||||
typedef __local float eta_shmem[block_height+1][block_width+2];
|
||||
typedef __local float u_shmem[block_height+1][block_width+1];
|
||||
typedef __local float v_shmem[block_height+2][block_width+2];
|
||||
|
||||
|
||||
|
||||
float windStressY(int wind_stress_type_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
float Y = 0.0f;
|
||||
|
||||
switch (wind_stress_type_) {
|
||||
case 2: //MOVING_CYCLONE:
|
||||
{
|
||||
const float x = (get_global_id(0)+0.5f)*dx_;
|
||||
const float y = (get_global_id(1))*dy_;
|
||||
const float a = (x-x0_-u0_*(t_+dt_));
|
||||
const float aa = a*a;
|
||||
const float b = (y-y0_-v0_*(t_+dt_));
|
||||
const float bb = b*b;
|
||||
const float r = sqrt(aa+bb);
|
||||
const float c = 1.0f - r/Rc_;
|
||||
const float xi = c*c;
|
||||
|
||||
Y = (tau0_/rho_) * (a/Rc_) * exp(-0.5f*xi);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return Y;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves V one step in time.
|
||||
*/
|
||||
__kernel void computeVKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r1_, //< Inter-layer friction coefficient
|
||||
float r2_, //< Bottom friction coefficient
|
||||
|
||||
//Numerical diffusion
|
||||
float A_,
|
||||
|
||||
//Density of each layer
|
||||
float rho1_,
|
||||
float rho2_,
|
||||
|
||||
//Data for layer 1
|
||||
__global float* H1_ptr_, int H1_pitch_,
|
||||
__global float* eta1_1_ptr_, int eta1_1_pitch_, // eta^n
|
||||
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
|
||||
__global float* V1_0_ptr_, int V1_0_pitch_, // V^n-1, also output V^n+1
|
||||
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
|
||||
|
||||
//Data for layer 2
|
||||
__global float* H2_ptr_, int H2_pitch_,
|
||||
__global float* eta2_1_ptr_, int eta2_1_pitch_,
|
||||
__global float* U2_1_ptr_, int U2_1_pitch_,
|
||||
__global float* V2_0_ptr_, int V2_0_pitch_,
|
||||
__global float* V2_1_ptr_, int V2_1_pitch_,
|
||||
|
||||
// Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
eta_shmem H1_shared;
|
||||
eta_shmem eta1_shared;
|
||||
u_shmem U1_shared;
|
||||
v_shmem V1_shared;
|
||||
|
||||
eta_shmem H2_shared;
|
||||
eta_shmem eta2_shared;
|
||||
u_shmem U2_shared;
|
||||
v_shmem V2_shared;
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Start of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = bx + tx;
|
||||
const int tj = by + ty;
|
||||
|
||||
//Compute pointer to current row in the V array
|
||||
__global float* const V1_0_row = (__global float*) ((__global char*) V1_0_ptr_ + V1_0_pitch_*tj);
|
||||
__global float* const V2_0_row = (__global float*) ((__global char*) V2_0_ptr_ + V2_0_pitch_*tj);
|
||||
|
||||
//Read current V
|
||||
float V1_0 = 0.0f;
|
||||
float V2_0 = 0.0f;
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
||||
V1_0 = V1_0_row[ti];
|
||||
V2_0 = V2_0_row[ti];
|
||||
}
|
||||
|
||||
//Read H and eta into shared memory: (nx+2)*(ny+1) cells
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int l = clamp(by + j, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the H and eta arrays
|
||||
__global float* const H1_row = (__global float*) ((__global char*) H1_ptr_ + H1_pitch_*l);
|
||||
__global float* const H2_row = (__global float*) ((__global char*) H2_ptr_ + H2_pitch_*l);
|
||||
|
||||
__global float* const eta1_1_row = (__global float*) ((__global char*) eta1_1_ptr_ + eta1_1_pitch_*l);
|
||||
__global float* const eta2_1_row = (__global float*) ((__global char*) eta2_1_ptr_ + eta2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int k = clamp(bx + i - 1, 1, nx_);
|
||||
|
||||
H1_shared[j][i] = H1_row[k];
|
||||
H2_shared[j][i] = H2_row[k];
|
||||
|
||||
eta1_shared[j][i] = eta1_1_row[k];
|
||||
eta2_shared[j][i] = eta2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Read U into shared memory: (nx+1)*(ny+1) cells
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int l = clamp(by + j, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const U1_1_row = (__global float*) ((__global char*) U1_1_ptr_ + U1_1_pitch_*l);
|
||||
__global float* const U2_1_row = (__global float*) ((__global char*) U2_1_ptr_ + U2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
// Prevent out-of-bounds
|
||||
const int k = clamp(bx + i - 1, 0, nx_);
|
||||
|
||||
U1_shared[j][i] = U1_1_row[k];
|
||||
U2_shared[j][i] = U2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Read V into shared memory: (nx+2)*(ny+2) cells
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
// Prevent out-of-bounds
|
||||
const int l = clamp(by + j - 1, 0, ny_);
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const V1_1_row = (__global float*) ((__global char*) V1_1_ptr_ + V1_1_pitch_*l);
|
||||
__global float* const V2_1_row = (__global float*) ((__global char*) V2_1_ptr_ + V2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int k = clamp(bx + i - 1, 1, nx_);
|
||||
|
||||
V1_shared[j][i] = V1_1_row[k];
|
||||
V2_shared[j][i] = V2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/**
|
||||
* Now get all our required variables as short-hands
|
||||
* here we use the notation of
|
||||
* Var_00 as var_i,j
|
||||
* Var_p0 as var_i+1,j
|
||||
* Var_0m as var_i,j-1
|
||||
* etc
|
||||
*/
|
||||
//Layer 1
|
||||
const float V1_00 = V1_shared[ty+1][tx+1]; //V at "center"
|
||||
const float V1_0p = V1_shared[ty+2][tx+1]; //V at "north"
|
||||
const float V1_0m = V1_shared[ty ][tx+1]; //V at "south"
|
||||
const float V1_p0 = V1_shared[ty+1][tx+2]; //V at "east"
|
||||
const float V1_m0 = V1_shared[ty+1][tx ]; //V at "west"
|
||||
|
||||
const float U1_00 = U1_shared[ty ][tx+1];
|
||||
const float U1_0p = U1_shared[ty+1][tx+1];
|
||||
const float U1_m0 = U1_shared[ty ][tx ];
|
||||
const float U1_mp = U1_shared[ty+1][tx ];
|
||||
|
||||
const float H1_m0 = H1_shared[ty ][tx ];
|
||||
const float H1_00 = H1_shared[ty ][tx+1];
|
||||
const float H1_p0 = H1_shared[ty ][tx+2];
|
||||
const float H1_mp = H1_shared[ty+1][tx ];
|
||||
const float H1_0p = H1_shared[ty+1][tx+1];
|
||||
const float H1_pp = H1_shared[ty+1][tx+2];
|
||||
|
||||
const float eta1_m0 = eta1_shared[ty ][tx ];
|
||||
const float eta1_00 = eta1_shared[ty ][tx+1];
|
||||
const float eta1_p0 = eta1_shared[ty ][tx+2];
|
||||
const float eta1_mp = eta1_shared[ty+1][tx ];
|
||||
const float eta1_0p = eta1_shared[ty+1][tx+1];
|
||||
const float eta1_pp = eta1_shared[ty+1][tx+2];
|
||||
|
||||
|
||||
//Layer 2 (bottom)
|
||||
const float V2_00 = V2_shared[ty+1][tx+1];
|
||||
const float V2_0p = V2_shared[ty+2][tx+1];
|
||||
const float V2_0m = V2_shared[ty ][tx+1];
|
||||
const float V2_p0 = V2_shared[ty+1][tx+2];
|
||||
const float V2_m0 = V2_shared[ty+1][tx ];
|
||||
|
||||
const float U2_00 = U2_shared[ty ][tx+1];
|
||||
const float U2_0p = U2_shared[ty+1][tx+1];
|
||||
const float U2_m0 = U2_shared[ty ][tx ];
|
||||
const float U2_mp = U2_shared[ty+1][tx ];
|
||||
|
||||
const float H2_m0 = H2_shared[ty ][tx ];
|
||||
const float H2_00 = H2_shared[ty ][tx+1];
|
||||
const float H2_p0 = H2_shared[ty ][tx+2];
|
||||
const float H2_mp = H2_shared[ty+1][tx ];
|
||||
const float H2_0p = H2_shared[ty+1][tx+1];
|
||||
const float H2_pp = H2_shared[ty+1][tx+2];
|
||||
|
||||
const float eta2_m0 = eta2_shared[ty ][tx ];
|
||||
const float eta2_00 = eta2_shared[ty ][tx+1];
|
||||
const float eta2_p0 = eta2_shared[ty ][tx+2];
|
||||
const float eta2_mp = eta2_shared[ty+1][tx ];
|
||||
const float eta2_0p = eta2_shared[ty+1][tx+1];
|
||||
const float eta2_pp = eta2_shared[ty+1][tx+2];
|
||||
|
||||
|
||||
//Reconstruct Eta_bar at the V position
|
||||
const float eta1_bar_m0 = 0.25f*(eta1_m0 + eta1_mp + eta1_00 + eta1_0p);
|
||||
const float eta1_bar_00 = 0.25f*(eta1_00 + eta1_0p + eta1_p0 + eta1_pp);
|
||||
|
||||
const float eta2_bar_m0 = 0.25f*(eta2_m0 + eta2_mp + eta2_00 + eta2_0p);
|
||||
const float eta2_bar_00 = 0.25f*(eta2_00 + eta2_0p + eta2_p0 + eta2_pp);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Reconstruct H_bar and H_y (at the V position)
|
||||
const float H1_bar_m0 = 0.25f*(H1_m0 + H1_mp + H1_00 + H1_0p);
|
||||
const float H1_bar_00 = 0.25f*(H1_00 + H1_0p + H1_p0 + H1_pp);
|
||||
const float H1_y = 0.5f*(H1_00 + H1_0p);
|
||||
|
||||
const float H2_bar_m0 = 0.25f*(H2_m0 + H2_mp + H2_00 + H2_0p);
|
||||
const float H2_bar_00 = 0.25f*(H2_00 + H2_0p + H2_p0 + H2_pp);
|
||||
const float H2_y = 0.5f*(H2_00 + H2_0p);
|
||||
|
||||
|
||||
|
||||
//Compute layer thickness of top layer
|
||||
const float h1_0p = H1_0p + eta1_0p - eta2_0p;
|
||||
const float h1_00 = H1_00 + eta1_00 - eta2_00;
|
||||
const float h1_bar_00 = H1_bar_00 + eta1_bar_00 - eta2_bar_00;
|
||||
const float h1_bar_m0 = H1_bar_m0 + eta1_bar_m0 - eta2_bar_m0;
|
||||
|
||||
const float h2_0p = H2_0p + eta2_0p;
|
||||
const float h2_00 = H2_00 + eta2_00;
|
||||
const float h2_bar_00 = H2_bar_00 + eta2_bar_00;
|
||||
const float h2_bar_m0 = H2_bar_m0 + eta2_bar_m0;
|
||||
|
||||
|
||||
|
||||
//Compute pressure components
|
||||
const float h1_y = 0.5f*(h1_0p + h1_00);
|
||||
const float h2_y = 0.5f*(h2_0p + h2_00);
|
||||
|
||||
//const float epsilon = (rho2_ - rho1_)/rho2_;
|
||||
//const float P1_y = -0.5f*g_*(h1_0p + h1_00) * (eta1_0p - eta1_00 + h2_0p - h2_00) * (1.0f - epsilon);
|
||||
//const float P2_y = -0.5f*g_*(h2_0p + h2_00) * (eta2_0p - eta2_00 + H2_0p - H2_00);
|
||||
|
||||
const float P1_y = -g_*h1_y*(eta1_0p - eta1_00) - 0.5f*g_*(eta1_0p*eta1_0p - eta1_00*eta1_00);
|
||||
|
||||
const float P2_y = -g_ * (rho1_/rho2_) *
|
||||
( //Pressure contribution from top layer
|
||||
h2_y*(eta1_0p - eta1_00) + 0.5f*(eta1_0p*eta1_0p - eta1_00*eta1_00)
|
||||
)
|
||||
-g_ * ((rho2_ - rho1_)/rho2_) *
|
||||
( //Pressure contribution from bottom layer
|
||||
h2_y*(eta2_0p - eta2_00) + 0.5f*(eta2_0p*eta2_0p - eta2_00*eta2_00)
|
||||
);
|
||||
|
||||
|
||||
//Reconstruct U at the V position
|
||||
const float U1_bar = 0.25f*(U1_m0 + U1_00 + U1_mp + U1_0p);
|
||||
const float U2_bar = 0.25f*(U2_m0 + U2_00 + U2_mp + U2_0p);
|
||||
|
||||
|
||||
|
||||
|
||||
//Calculate the friction coefficient
|
||||
//FIXME: Should this be h instead of H?
|
||||
const float C1 = r1_/H1_y;
|
||||
const float C2 = r2_/H2_y;
|
||||
|
||||
|
||||
|
||||
//Calculate numerical diffusion / subgrid energy loss coefficient
|
||||
const float D = 2.0f*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
||||
|
||||
|
||||
|
||||
|
||||
//Calculate nonlinear effects
|
||||
const float N1_a = (V1_0p + V1_00)*(V1_0p + V1_00) / (h1_0p);
|
||||
const float N1_b = (V1_00 + V1_0m)*(V1_00 + V1_0m) / (h1_00);
|
||||
const float N1_c = (U1_0p + U1_00)*(V1_p0 + V1_00) / (h1_bar_00);
|
||||
const float N1_d = (U1_mp + U1_m0)*(V1_00 + V1_m0) / (h1_bar_m0);
|
||||
const float N1 = 0.25f*( N1_a - N1_b + (dy_/dx_)*(N1_c - N1_d) );
|
||||
|
||||
const float N2_a = (V2_0p + V2_00)*(V2_0p + V2_00) / (h2_0p);
|
||||
const float N2_b = (V2_00 + V2_0m)*(V2_00 + V2_0m) / (h2_00);
|
||||
const float N2_c = (U2_0p + U2_00)*(V2_p0 + V2_00) / (h2_bar_00);
|
||||
const float N2_d = (U2_mp + U2_m0)*(V2_00 + V2_m0) / (h2_bar_m0);
|
||||
const float N2 = 0.25f*( N2_a - N2_b + (dy_/dx_)*(N2_c - N2_d) );
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//Calculate eddy viscosity term
|
||||
const float E1 = (V1_p0 - V1_0 + V1_m0)/(dx_*dx_) + (V1_0p - V1_0 + V1_0m)/(dy_*dy_);
|
||||
const float E2 = (V2_p0 - V2_0 + V2_m0)/(dx_*dx_) + (V2_0p - V2_0 + V2_0m)/(dy_*dy_);
|
||||
|
||||
|
||||
|
||||
//Calculate the wind shear stress
|
||||
const float Y = windStressY(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho1_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
//Compute the V at the next timestep
|
||||
float V1_2 = (V1_0 + 2.0f*dt_*(-f_*U1_bar + (N1 + P1_y)/dy_ + Y + C1*V2_0 + A_*E1) ) / (1.0f + D);
|
||||
float V2_2 = (V2_0 + 2.0f*dt_*(-f_*U2_bar + (N2 + P2_y)/dy_ + C1*V1_0 + A_*E2) ) / (1.0f + 2.0f*dt_*C2 + D);
|
||||
|
||||
//Write to main memory for internal cells
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
||||
V1_0_row[ti] = V1_2;
|
||||
V2_0_row[ti] = V2_2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
128
SWESimulators/CTCS2Layer_eta_kernel.opencl
Normal file
128
SWESimulators/CTCS2Layer_eta_kernel.opencl
Normal file
@@ -0,0 +1,128 @@
|
||||
/**
|
||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#define block_height 8
|
||||
#define block_width 8
|
||||
|
||||
typedef __local float u_shmem[block_height][block_width+1];
|
||||
typedef __local float v_shmem[block_height+1][block_width];
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves eta one step in time.
|
||||
*/
|
||||
__kernel void computeEtaKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Data for layer 1
|
||||
__global float* eta1_0_ptr_, int eta1_0_pitch_, //eta_1^n-1 (also used as output, that is eta_1^n+1)
|
||||
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
|
||||
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
|
||||
|
||||
//Data for layer 2
|
||||
__global float* eta2_0_ptr_, int eta2_0_pitch_, //eta_2^n-1 (also used as output, that is eta_2^n+1)
|
||||
__global float* U2_1_ptr_, int U2_1_pitch_, // U^n
|
||||
__global float* V2_1_ptr_, int V2_1_pitch_ // V^n
|
||||
) {
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Start of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = bx + tx;
|
||||
const int tj = by + ty;
|
||||
|
||||
//Layer 1
|
||||
u_shmem U1_1_shared;
|
||||
v_shmem V1_1_shared;
|
||||
|
||||
//Layer 2
|
||||
u_shmem U2_1_shared;
|
||||
v_shmem V2_1_shared;
|
||||
|
||||
//Compute pointer to current row in the eta arrays
|
||||
__global float* eta1_0_row = (__global float*) ((__global char*) eta1_0_ptr_ + eta1_0_pitch_*tj);
|
||||
__global float* eta2_0_row = (__global float*) ((__global char*) eta2_0_ptr_ + eta2_0_pitch_*tj);
|
||||
|
||||
//Read current eta
|
||||
float eta1_0 = 0.0f;
|
||||
float eta2_0 = 0.0f;
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
eta1_0 = eta1_0_row[ti];
|
||||
eta2_0 = eta2_0_row[ti];
|
||||
}
|
||||
|
||||
//Read U into shared memory
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j, 1, ny_); // fake ghost cells
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const U1_1_row = (__global float*) ((__global char*) U1_1_ptr_ + U1_1_pitch_*l);
|
||||
__global float* const U2_1_row = (__global float*) ((__global char*) U2_1_ptr_ + U2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i - 1, 0, nx_); // prevent out of bounds
|
||||
|
||||
U1_1_shared[j][i] = U1_1_row[k];
|
||||
U2_1_shared[j][i] = U2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Read V into shared memory
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j - 1, 0, ny_); // prevent out of bounds
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const V1_1_row = (__global float*) ((__global char*) V1_1_ptr_ + V1_1_pitch_*l);
|
||||
__global float* const V2_1_row = (__global float*) ((__global char*) V2_1_ptr_ + V2_1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i, 1, nx_); // fake ghost cells
|
||||
|
||||
V1_1_shared[j][i] = V1_1_row[k];
|
||||
V2_1_shared[j][i] = V2_1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute the H at the next timestep
|
||||
float eta1_2 = eta1_0 - 2.0f*dt_/dx_ * (U1_1_shared[ty][tx+1] - U1_1_shared[ty][tx] + U2_1_shared[ty][tx+1] - U2_1_shared[ty][tx])
|
||||
- 2.0f*dt_/dy_ * (V1_1_shared[ty+1][tx] - V1_1_shared[ty][tx] + V2_1_shared[ty+1][tx] - V2_1_shared[ty][tx]);
|
||||
float eta2_2 = eta2_0 - 2.0f*dt_/dx_ * (U2_1_shared[ty][tx+1] - U2_1_shared[ty][tx])
|
||||
- 2.0f*dt_/dy_ * (V2_1_shared[ty+1][tx] - V2_1_shared[ty][tx]);
|
||||
|
||||
//Write to main memory
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
eta1_0_row[ti] = eta1_2;
|
||||
eta2_0_row[ti] = eta2_2;
|
||||
}
|
||||
}
|
||||
218
SWESimulators/CTCS_U_kernel.opencl
Normal file
218
SWESimulators/CTCS_U_kernel.opencl
Normal file
@@ -0,0 +1,218 @@
|
||||
/**
|
||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves U one step in time.
|
||||
*/
|
||||
__kernel void computeUKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
//Numerical diffusion
|
||||
float A_,
|
||||
|
||||
//Data
|
||||
__global float* H_ptr_, int H_pitch_,
|
||||
__global float* eta1_ptr_, int eta1_pitch_, // eta^n
|
||||
__global float* U0_ptr_, int U0_pitch_, // U^n-1, also output, U^n+1
|
||||
__global float* U1_ptr_, int U1_pitch_, // U^n
|
||||
__global float* V1_ptr_, int V1_pitch_, // V^n
|
||||
|
||||
// Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
__local float H_shared[block_height+2][block_width+1];
|
||||
__local float eta1_shared[block_height+2][block_width+1];
|
||||
__local float U1_shared[block_height+2][block_width+2];
|
||||
__local float V1_shared[block_height+1][block_width+1];
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Start of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = bx + tx;
|
||||
const int tj = by + ty;
|
||||
|
||||
//Compute pointer to current row in the U array
|
||||
__global float* const U0_row = (__global float*) ((__global char*) U0_ptr_ + U0_pitch_*tj);
|
||||
|
||||
//Read current U
|
||||
float U0 = 0.0f;
|
||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
||||
U0 = U0_row[ti];
|
||||
}
|
||||
|
||||
//Read H and eta into shared memory: (nx+1)*(ny+2) cells
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int l = clamp(by + j - 1, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the H and eta arrays
|
||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
||||
__global float* const eta1_row = (__global float*) ((__global char*) eta1_ptr_ + eta1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int k = clamp(bx + i, 1, nx_);
|
||||
|
||||
H_shared[j][i] = H_row[k];
|
||||
eta1_shared[j][i] = eta1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Read U into shared memory: (nx+2)*(ny+2) cells
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int l = clamp(by + j - 1, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
// Prevent out-of-bounds
|
||||
const int k = clamp(bx + i - 1, 0, nx_);
|
||||
|
||||
U1_shared[j][i] = U1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Read V into shared memory: (nx+1)*(ny+1) cells
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
// Prevent out-of-bounds
|
||||
const int l = clamp(by + j - 1, 0, ny_);
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int k = clamp(bx + i, 1, nx_);
|
||||
|
||||
V1_shared[j][i] = V1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/**
|
||||
* Now get all our required variables as short-hands
|
||||
* here we use the notation of
|
||||
* Var_00 as var_i,j
|
||||
* Var_p0 as var_i+1,j
|
||||
* Var_0m as var_i,j-1
|
||||
* etc
|
||||
*/
|
||||
const float U_00 = U1_shared[ty+1][tx+1]; //U at "center"
|
||||
const float U_0p = U1_shared[ty+2][tx+1]; //U at "north"
|
||||
const float U_0m = U1_shared[ty ][tx+1]; //U at "south"
|
||||
const float U_p0 = U1_shared[ty+1][tx+2]; //U at "east"
|
||||
const float U_m0 = U1_shared[ty+1][tx ]; //U at "west"
|
||||
|
||||
const float V_00 = V1_shared[ty+1][tx ];
|
||||
const float V_p0 = V1_shared[ty+1][tx+1];
|
||||
const float V_0m = V1_shared[ty ][tx ];
|
||||
const float V_pm = V1_shared[ty ][tx+1];
|
||||
|
||||
const float H_0m = H_shared[ty ][tx ];
|
||||
const float H_00 = H_shared[ty+1][tx ];
|
||||
const float H_0p = H_shared[ty+2][tx ];
|
||||
const float H_pm = H_shared[ty ][tx+1];
|
||||
const float H_p0 = H_shared[ty+1][tx+1];
|
||||
const float H_pp = H_shared[ty+2][tx+1];
|
||||
|
||||
const float eta_0m = eta1_shared[ty ][tx ];
|
||||
const float eta_00 = eta1_shared[ty+1][tx ];
|
||||
const float eta_0p = eta1_shared[ty+2][tx ];
|
||||
const float eta_pm = eta1_shared[ty ][tx+1];
|
||||
const float eta_p0 = eta1_shared[ty+1][tx+1];
|
||||
const float eta_pp = eta1_shared[ty+2][tx+1];
|
||||
|
||||
//Reconstruct H_bar and H_x (at the U position)
|
||||
const float H_bar_0m = 0.25f*(H_0m + H_pm + H_00 + H_p0);
|
||||
const float H_bar_00 = 0.25f*(H_00 + H_p0 + H_0p + H_pp);
|
||||
const float H_x = 0.5f*(H_00 + H_p0);
|
||||
|
||||
//Reconstruct Eta_bar at the V position
|
||||
const float eta_bar_0m = 0.25f*(eta_0m + eta_pm + eta_00 + eta_p0);
|
||||
const float eta_bar_00 = 0.25f*(eta_00 + eta_p0 + eta_0p + eta_pp);
|
||||
|
||||
//Reconstruct V at the U position
|
||||
const float V_bar = 0.25f*(V_0m + V_00 + V_pm + V_p0);
|
||||
|
||||
//Calculate the friction coefficient
|
||||
const float C = 1.0 + 2*r_*dt_/H_x + 2*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
||||
|
||||
//Calculate the pressure/gravitational effect
|
||||
const float h_p0 = H_p0 + eta_p0;
|
||||
const float h_00 = H_00 + eta_00;
|
||||
const float h_x = 0.5*(h_00 + h_p0); //Could possibly use h for pressure terms instead of H
|
||||
const float P_x_hat = -0.5f*g_*(eta_p0*eta_p0 - eta_00*eta_00);
|
||||
const float P_x = -g_*h_x*(eta_p0 - eta_00) + P_x_hat;
|
||||
|
||||
//Calculate nonlinear effects
|
||||
const float N_a = (U_p0 + U_00)*(U_p0 + U_00) / (H_p0 + eta_p0);
|
||||
const float N_b = (U_00 + U_m0)*(U_00 + U_m0) / (H_00 + eta_00);
|
||||
const float N_c = (U_0p + U_00)*(V_p0 + V_00) / (H_bar_00 + eta_bar_00);
|
||||
const float N_d = (U_00 + U_0m)*(V_pm + V_0m) / (H_bar_0m + eta_bar_0m);
|
||||
float N = 0.25f*( N_a - N_b + (dx_/dy_)*(N_c - N_d) );
|
||||
|
||||
//Calculate eddy viscosity term
|
||||
float E = (U_p0 - U0 + U_m0)/(dx_*dx_) + (U_0p - U0 + U_0m)/(dy_*dy_);
|
||||
|
||||
//Calculate the wind shear stress
|
||||
float X = windStressX(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
//Compute the V at the next timestep
|
||||
float U2 = (U0 + 2.0f*dt_*(f_*V_bar + (N + P_x)/dx_ + X + A_*E) ) / C;
|
||||
|
||||
//Write to main memory for internal cells
|
||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
||||
U0_row[ti] = U2;
|
||||
}
|
||||
}
|
||||
222
SWESimulators/CTCS_V_kernel.opencl
Normal file
222
SWESimulators/CTCS_V_kernel.opencl
Normal file
@@ -0,0 +1,222 @@
|
||||
/**
|
||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves V one step in time.
|
||||
*/
|
||||
__kernel void computeVKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
//Numerical diffusion
|
||||
float A_,
|
||||
|
||||
//Data
|
||||
__global float* H_ptr_, int H_pitch_,
|
||||
__global float* eta1_ptr_, int eta1_pitch_, // eta^n
|
||||
__global float* U1_ptr_, int U1_pitch_, // U^n
|
||||
__global float* V0_ptr_, int V0_pitch_, // V^n-1, also output V^n+1
|
||||
__global float* V1_ptr_, int V1_pitch_, // V^n
|
||||
|
||||
// Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
__local float H_shared[block_height+1][block_width+2];
|
||||
__local float eta1_shared[block_height+1][block_width+2];
|
||||
__local float U1_shared[block_height+1][block_width+1];
|
||||
__local float V1_shared[block_height+2][block_width+2];
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Start of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = bx + tx;
|
||||
const int tj = by + ty;
|
||||
|
||||
//Compute pointer to current row in the V array
|
||||
__global float* const V0_row = (__global float*) ((__global char*) V0_ptr_ + V0_pitch_*tj);
|
||||
|
||||
//Read current V
|
||||
float V0 = 0.0f;
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
||||
V0 = V0_row[ti];
|
||||
}
|
||||
|
||||
//Read H and eta into shared memory: (nx+2)*(ny+1) cells
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int l = clamp(by + j, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the H and eta arrays
|
||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
||||
__global float* const eta1_row = (__global float*) ((__global char*) eta1_ptr_ + eta1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
// "fake" global ghost cells by clamping
|
||||
const int k = clamp(bx + i - 1, 1, nx_);
|
||||
|
||||
H_shared[j][i] = H_row[k];
|
||||
eta1_shared[j][i] = eta1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Read U into shared memory: (nx+1)*(ny+1) cells
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int l = clamp(by + j, 1, ny_);
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
// Prevent out-of-bounds
|
||||
const int k = clamp(bx + i - 1, 0, nx_);
|
||||
|
||||
U1_shared[j][i] = U1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//Read V into shared memory: (nx+2)*(ny+2) cells
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
// Prevent out-of-bounds
|
||||
const int l = clamp(by + j - 1, 0, ny_);
|
||||
|
||||
//Compute the pointer to current row in the U array
|
||||
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
// "fake" ghost cells by clamping
|
||||
const int k = clamp(bx + i - 1, 1, nx_);
|
||||
|
||||
V1_shared[j][i] = V1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
/**
|
||||
* Now get all our required variables as short-hands
|
||||
* here we use the notation of
|
||||
* Var_00 as var_i,j
|
||||
* Var_p0 as var_i+1,j
|
||||
* Var_0m as var_i,j-1
|
||||
* etc
|
||||
*/
|
||||
const float V_00 = V1_shared[ty+1][tx+1]; //V at "center"
|
||||
const float V_0p = V1_shared[ty+2][tx+1]; //V at "north"
|
||||
const float V_0m = V1_shared[ty ][tx+1]; //V at "south"
|
||||
const float V_p0 = V1_shared[ty+1][tx+2]; //V at "east"
|
||||
const float V_m0 = V1_shared[ty+1][tx ]; //V at "west"
|
||||
|
||||
const float U_00 = U1_shared[ty ][tx+1];
|
||||
const float U_0p = U1_shared[ty+1][tx+1];
|
||||
const float U_m0 = U1_shared[ty ][tx ];
|
||||
const float U_mp = U1_shared[ty+1][tx ];
|
||||
|
||||
const float H_m0 = H_shared[ty ][tx ];
|
||||
const float H_00 = H_shared[ty ][tx+1];
|
||||
const float H_p0 = H_shared[ty ][tx+2];
|
||||
const float H_mp = H_shared[ty+1][tx ];
|
||||
const float H_0p = H_shared[ty+1][tx+1];
|
||||
const float H_pp = H_shared[ty+1][tx+2];
|
||||
|
||||
const float eta_m0 = eta1_shared[ty ][tx ];
|
||||
const float eta_00 = eta1_shared[ty ][tx+1];
|
||||
const float eta_p0 = eta1_shared[ty ][tx+2];
|
||||
const float eta_mp = eta1_shared[ty+1][tx ];
|
||||
const float eta_0p = eta1_shared[ty+1][tx+1];
|
||||
const float eta_pp = eta1_shared[ty+1][tx+2];
|
||||
|
||||
|
||||
//Reconstruct H_bar and H_y (at the V position)
|
||||
const float H_bar_m0 = 0.25f*(H_m0 + H_mp + H_00 + H_0p);
|
||||
const float H_bar_00 = 0.25f*(H_00 + H_0p + H_p0 + H_pp);
|
||||
const float H_y = 0.5f*(H_00 + H_0p);
|
||||
|
||||
//Reconstruct Eta_bar at the V position
|
||||
const float eta_bar_m0 = 0.25f*(eta_m0 + eta_mp + eta_00 + eta_0p);
|
||||
const float eta_bar_00 = 0.25f*(eta_00 + eta_0p + eta_p0 + eta_pp);
|
||||
|
||||
//Reconstruct U at the V position
|
||||
const float U_bar = 0.25f*(U_m0 + U_00 + U_mp + U_0p);
|
||||
|
||||
//Calculate the friction coefficient
|
||||
const float C = 1.0 + 2*r_*dt_/H_y + 2*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
||||
|
||||
//Calculate the pressure/gravitational effect
|
||||
const float h_0p = H_0p + eta_0p;
|
||||
const float h_00 = H_00 + eta_00;
|
||||
const float h_y = 0.5*(h_00 + h_0p); //Could possibly use h for pressure terms instead of H
|
||||
const float P_y_hat = -0.5f*g_*(eta_0p*eta_0p - eta_00*eta_00);
|
||||
const float P_y = -g_*h_y*(eta_0p - eta_00) + P_y_hat;
|
||||
|
||||
//Calculate nonlinear effects
|
||||
const float N_a = (V_0p + V_00)*(V_0p + V_00) / (H_0p + eta_0p);
|
||||
const float N_b = (V_00 + V_0m)*(V_00 + V_0m) / (H_00 + eta_00);
|
||||
const float N_c = (U_0p + U_00)*(V_p0 + V_00) / (H_bar_00 + eta_bar_00);
|
||||
const float N_d = (U_mp + U_m0)*(V_00 + V_m0) / (H_bar_m0 + eta_bar_m0);
|
||||
float N = 0.25f*( N_a - N_b + (dy_/dx_)*(N_c - N_d) );
|
||||
|
||||
//Calculate eddy viscosity term
|
||||
float E = (V_p0 - V0 + V_m0)/(dx_*dx_) + (V_0p - V0 + V_0m)/(dy_*dy_);
|
||||
|
||||
//Calculate the wind shear stress
|
||||
float Y = windStressY(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
//Compute the V at the next timestep
|
||||
float V2 = (V0 + 2.0f*dt_*(-f_*U_bar + (N + P_y)/dy_ + Y + A_*E) ) / C;
|
||||
|
||||
//Write to main memory for internal cells
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
||||
V0_row[ti] = V2;
|
||||
}
|
||||
}
|
||||
109
SWESimulators/CTCS_eta_kernel.opencl
Normal file
109
SWESimulators/CTCS_eta_kernel.opencl
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
||||
described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves eta one step in time.
|
||||
*/
|
||||
__kernel void computeEtaKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
//Data
|
||||
__global float* eta0_ptr_, int eta0_pitch_, //eta^n-1 (also used as output, that is eta^n+1)
|
||||
__global float* U1_ptr_, int U1_pitch_, // U^n
|
||||
__global float* V1_ptr_, int V1_pitch_ // V^n
|
||||
) {
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Start of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = bx + tx;
|
||||
const int tj = by + ty;
|
||||
|
||||
__local float U1_shared[block_height][block_width+1];
|
||||
__local float V1_shared[block_height+1][block_width];
|
||||
|
||||
//Compute pointer to current row in the U array
|
||||
__global float* eta0_row = (__global float*) ((__global char*) eta0_ptr_ + eta0_pitch_*tj);
|
||||
|
||||
//Read current eta
|
||||
float eta0 = 0.0f;
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
eta0 = eta0_row[ti];
|
||||
}
|
||||
|
||||
//Read U into shared memory
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j, 1, ny_); // fake ghost cells
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i - 1, 0, nx_); // prevent out of bounds
|
||||
|
||||
U1_shared[j][i] = U1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Read V into shared memory
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j - 1, 0, ny_); // prevent out of bounds
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i, 1, nx_); // fake ghost cells
|
||||
|
||||
V1_shared[j][i] = V1_row[k];
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute the H at the next timestep
|
||||
float eta2 = eta0 - 2.0f*dt_/dx_ * (U1_shared[ty][tx+1] - U1_shared[ty][tx])
|
||||
- 2.0f*dt_/dy_ * (V1_shared[ty+1][tx] - V1_shared[ty][tx]);
|
||||
|
||||
//Write to main memory
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
eta0_row[ti] = eta2;
|
||||
}
|
||||
}
|
||||
288
SWESimulators/Common.py
Normal file
288
SWESimulators/Common.py
Normal file
@@ -0,0 +1,288 @@
|
||||
import pyopencl
|
||||
import os
|
||||
import numpy as np
|
||||
|
||||
"""
|
||||
Static function which reads a text file and creates an OpenCL kernel from that
|
||||
"""
|
||||
def get_kernel(cl_ctx, kernel_filename, block_width, block_height):
|
||||
import datetime
|
||||
|
||||
#Create define string
|
||||
define_string = "#define block_width " + str(block_width) + "\n"
|
||||
define_string += "#define block_height " + str(block_height) + "\n\n"
|
||||
define_string += "#ifndef my_variable_to_force_recompilation\n"
|
||||
define_string += "#define my_variable_to_force_recompilation " + datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + "\n"
|
||||
define_string += "#undef my_variable_to_force_recompilation \n"
|
||||
define_string += "#endif\n\n"
|
||||
|
||||
|
||||
def shellquote(s):
|
||||
assert(cl_ctx.num_devices == 1)
|
||||
platform_name = cl_ctx.devices[0].get_info(pyopencl.device_info.PLATFORM).name
|
||||
platform_name = platform_name.upper()
|
||||
if ('INTEL' in platform_name):
|
||||
#Intel CL compiler doesn't like spaces in include paths. We have to escape them
|
||||
return '"' + s.replace(" ", "\\ ") + '"'
|
||||
elif ('NVIDIA' in platform_name):
|
||||
#NVIDIA doesn't like double quoted paths...
|
||||
return "'" + s + "'"
|
||||
|
||||
module_path = os.path.dirname(os.path.realpath(__file__))
|
||||
module_path_escaped = shellquote(module_path)
|
||||
options = ['-I', module_path_escaped]
|
||||
|
||||
#Read the proper program
|
||||
fullpath = os.path.join(module_path, kernel_filename)
|
||||
with open(fullpath, "r") as kernel_file:
|
||||
kernel_string = define_string + kernel_file.read()
|
||||
kernel = pyopencl.Program(cl_ctx, kernel_string).build(options)
|
||||
|
||||
return kernel
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that holds data
|
||||
"""
|
||||
class OpenCLArray2D:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, data):
|
||||
host_data = self.convert_to_float32(data)
|
||||
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
self.nx_halo = nx + 2*halo_x
|
||||
self.ny_halo = ny + 2*halo_y
|
||||
assert(host_data.shape[1] == self.nx_halo)
|
||||
assert(host_data.shape[0] == self.ny_halo)
|
||||
|
||||
assert(data.shape == (self.ny_halo, self.nx_halo))
|
||||
|
||||
#Upload data to the device
|
||||
mf = pyopencl.mem_flags
|
||||
self.data = pyopencl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=host_data)
|
||||
|
||||
self.bytes_per_float = host_data.itemsize
|
||||
assert(self.bytes_per_float == 4)
|
||||
self.pitch = np.int32((self.nx_halo)*self.bytes_per_float)
|
||||
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, cl_queue):
|
||||
#Allocate data on the host for result
|
||||
host_data = np.empty((self.ny_halo, self.nx_halo), dtype=np.float32, order='C')
|
||||
|
||||
#Copy data from device to host
|
||||
pyopencl.enqueue_copy(cl_queue, host_data, self.data)
|
||||
|
||||
#Return
|
||||
return host_data
|
||||
|
||||
"""
|
||||
Converts to C-style float 32 array suitable for the GPU/OpenCL
|
||||
"""
|
||||
@staticmethod
|
||||
def convert_to_float32(data):
|
||||
if (not np.issubdtype(data.dtype, np.float32) or np.isfortran(data)):
|
||||
print "Converting H0"
|
||||
return data.astype(np.float32, order='C')
|
||||
else:
|
||||
return data
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
|
||||
"""
|
||||
class SWEDataArkawaA:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
"""
|
||||
Swaps the variables after a timestep has been completed
|
||||
"""
|
||||
def swap(self):
|
||||
self.h1, self.h0 = self.h0, self.h1
|
||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, cl_queue):
|
||||
h_cpu = self.h0.download(cl_queue)
|
||||
hu_cpu = self.hu0.download(cl_queue)
|
||||
hv_cpu = self.hv0.download(cl_queue)
|
||||
|
||||
return h_cpu, hu_cpu, hv_cpu
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
|
||||
"""
|
||||
class SWEDataArkawaA:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
"""
|
||||
Swaps the variables after a timestep has been completed
|
||||
"""
|
||||
def swap(self):
|
||||
self.h1, self.h0 = self.h0, self.h1
|
||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, cl_queue):
|
||||
h_cpu = self.h0.download(cl_queue)
|
||||
hu_cpu = self.hu0.download(cl_queue)
|
||||
hv_cpu = self.hv0.download(cl_queue)
|
||||
|
||||
return h_cpu, hu_cpu, hv_cpu
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
A class representing an Akrawa C type (staggered, u fluxes on east/west faces, v fluxes on north/south faces) grid
|
||||
We use h as cell centers
|
||||
"""
|
||||
class SWEDataArkawaC:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||
#FIXME: This at least works for 0 and 1 ghost cells, but not convinced it generalizes
|
||||
assert(halo_x <= 1 and halo_y <= 1)
|
||||
|
||||
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu0 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
|
||||
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
|
||||
|
||||
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu1 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
|
||||
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
|
||||
|
||||
"""
|
||||
Swaps the variables after a timestep has been completed
|
||||
"""
|
||||
def swap(self):
|
||||
#h is assumed to be constant (bottom topography really)
|
||||
self.h1, self.h0 = self.h0, self.h1
|
||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, cl_queue):
|
||||
h_cpu = self.h0.download(cl_queue)
|
||||
hu_cpu = self.hu0.download(cl_queue)
|
||||
hv_cpu = self.hv0.download(cl_queue)
|
||||
|
||||
return h_cpu, hu_cpu, hv_cpu
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class which represents different wind stresses
|
||||
"""
|
||||
class WindStressParams:
|
||||
|
||||
"""
|
||||
wind_type: TYpe of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
||||
wind_tau0: Amplitude of wind stress (Pa)
|
||||
wind_rho: Density of sea water (1025.0 kg / m^3)
|
||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
||||
"""
|
||||
def __init__(self,
|
||||
type=99, # "no wind" \
|
||||
tau0=0, rho=0, alpha=0, xm=0, Rc=0, \
|
||||
x0=0, y0=0, \
|
||||
u0=0, v0=0):
|
||||
self.type = np.int32(type)
|
||||
self.tau0 = np.float32(tau0)
|
||||
self.rho = np.float32(rho)
|
||||
self.alpha = np.float32(alpha)
|
||||
self.xm = np.float32(xm)
|
||||
self.Rc = np.float32(Rc)
|
||||
self.x0 = np.float32(x0)
|
||||
self.y0 = np.float32(y0)
|
||||
self.u0 = np.float32(u0)
|
||||
self.v0 = np.float32(v0)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
123
SWESimulators/DataOutput.py
Normal file
123
SWESimulators/DataOutput.py
Normal file
@@ -0,0 +1,123 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements saving shallow water simulations to a
|
||||
netcdf file.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from netCDF4 import Dataset
|
||||
|
||||
class CTCSNetCDFWriter:
|
||||
def __init__(self, outfilename, nx, ny, dx, dy, ignore_ghostcells=True):
|
||||
self.ncfile = Dataset(outfilename,'w')
|
||||
self.ignore_ghostcells = ignore_ghostcells
|
||||
|
||||
#Create dimensions
|
||||
self.ncfile.createDimension('time', None) #Unlimited time dimension
|
||||
if (self.ignore_ghostcells):
|
||||
self.ncfile.createDimension('x_eta', nx)
|
||||
self.ncfile.createDimension('y_eta', ny)
|
||||
self.ncfile.createDimension('x_u', nx-1)
|
||||
self.ncfile.createDimension('y_u', ny)
|
||||
self.ncfile.createDimension('x_v', nx)
|
||||
self.ncfile.createDimension('y_v', ny-1)
|
||||
else:
|
||||
self.ncfile.createDimension('x_eta', nx+2)
|
||||
self.ncfile.createDimension('y_eta', ny+2)
|
||||
self.ncfile.createDimension('x_u', nx+1)
|
||||
self.ncfile.createDimension('y_u', ny+2)
|
||||
self.ncfile.createDimension('x_v', nx+2)
|
||||
self.ncfile.createDimension('y_v', ny+1)
|
||||
|
||||
#Create axis
|
||||
self.nc_time = self.ncfile.createVariable('time', np.dtype('float32').char, 'time')
|
||||
x_eta = self.ncfile.createVariable('x_eta', np.dtype('float32').char, 'x_eta')
|
||||
y_eta = self.ncfile.createVariable('y_eta', np.dtype('float32').char, 'y_eta')
|
||||
x_u = self.ncfile.createVariable('x_u', np.dtype('float32').char, 'x_u')
|
||||
y_u = self.ncfile.createVariable('y_u', np.dtype('float32').char, 'y_u')
|
||||
x_v = self.ncfile.createVariable('x_v', np.dtype('float32').char, 'x_v')
|
||||
y_v = self.ncfile.createVariable('y_v', np.dtype('float32').char, 'y_v')
|
||||
|
||||
#Set axis values/ticks
|
||||
if (self.ignore_ghostcells):
|
||||
x_eta[:] = np.linspace(dx/2.0, nx*dx - dx/2.0, nx)
|
||||
y_eta[:] = np.linspace(dy/2.0, ny*dy - dy/2.0, ny)
|
||||
x_u[:] = np.linspace(1, (nx-1)*dx, nx-1)
|
||||
y_u[:] = np.linspace(dy/2.0, ny*dy - dy/2.0, ny)
|
||||
x_v[:] = np.linspace(dx/2.0, nx*dx - dx/2.0, nx)
|
||||
y_v[:] = np.linspace(1, (ny-1)*dy, ny-1)
|
||||
else:
|
||||
x_eta[:] = np.linspace(-dx/2.0, nx*dx + dx/2.0, nx+2)
|
||||
y_eta[:] = np.linspace(-dy/2.0, ny*dy + dy/2.0, ny+2)
|
||||
x_u[:] = np.linspace(0, nx*dx, nx+1)
|
||||
y_u[:] = np.linspace(-dy/2.0, ny*dy + dy/2.0, ny+2)
|
||||
x_v[:] = np.linspace(-dx/2.0, nx*dx + dx/2.0, nx+2)
|
||||
y_v[:] = np.linspace(0, ny*dy, ny+1)
|
||||
|
||||
#Set units
|
||||
self.nc_time.units = 's'
|
||||
x_eta.units = 'm'
|
||||
y_eta.units = 'm'
|
||||
x_u.units = 'm'
|
||||
y_u.units = 'm'
|
||||
x_v.units = 'm'
|
||||
y_v.units = 'm'
|
||||
|
||||
|
||||
|
||||
#Create output data variables
|
||||
self.nc_eta = self.ncfile.createVariable('eta', np.dtype('float32').char, ('time', 'y_eta', 'x_eta'))
|
||||
self.nc_u = self.ncfile.createVariable('u', np.dtype('float32').char, ('time', 'y_u', 'x_u'))
|
||||
self.nc_v = self.ncfile.createVariable('v', np.dtype('float32').char, ('time', 'y_v', 'x_v'))
|
||||
|
||||
#Set units
|
||||
self.nc_eta.units = 'm'
|
||||
self.nc_u.units = 'm'
|
||||
self.nc_v.units = 'm'
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
print "Closing '" + self.ncfile.filepath() + "'"
|
||||
self.ncfile.close()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def write(self, i, t, eta, u, v):
|
||||
if (self.ignore_ghostcells):
|
||||
self.nc_time[i] = t
|
||||
self.nc_eta[i, :] = eta[1:-1, 1:-1]
|
||||
self.nc_u[i, :] = u[1:-1, 1:-1]
|
||||
self.nc_v[i, :] = v[1:-1, 1:-1]
|
||||
else:
|
||||
self.nc_time[i] = t
|
||||
self.nc_eta[i, :] = eta
|
||||
self.nc_u[i, :] = u
|
||||
self.nc_v[i, :] = v
|
||||
184
SWESimulators/FBL.py
Normal file
184
SWESimulators/FBL.py
Normal file
@@ -0,0 +1,184 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the Forward Backward Linear numerical
|
||||
scheme for the shallow water equations, described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class FBL:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
|
||||
eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
f: Coriolis parameter (1.2e-4 s^1)
|
||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||
wind_stress: Wind stress parameters
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
H, eta0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, f, r, \
|
||||
wind_stress=Common.WindStressParams(), \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.u_kernel = Common.get_kernel(self.cl_ctx, "FBL_U_kernel.opencl", block_width, block_height)
|
||||
self.v_kernel = Common.get_kernel(self.cl_ctx, "FBL_V_kernel.opencl", block_width, block_height)
|
||||
self.eta_kernel = Common.get_kernel(self.cl_ctx, "FBL_eta_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 0
|
||||
ghost_cells_y = 0
|
||||
self.H = Common.OpenCLArray2D(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, H)
|
||||
self.cl_data = Common.SWEDataArkawaC(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.f = np.float32(f)
|
||||
self.r = np.float32(r)
|
||||
self.wind_stress = wind_stress
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (8, 8) # WARNING::: MUST MATCH defines of block_width/height in kernels!
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.u_kernel.computeUKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, self.r, \
|
||||
self.H.data, self.H.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
|
||||
self.v_kernel.computeVKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, self.r, \
|
||||
self.H.data, self.H.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
|
||||
self.eta_kernel.computeEtaKernel(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, self.f, self.r, \
|
||||
self.H.data, self.H.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
163
SWESimulators/FBL_U_kernel.opencl
Normal file
163
SWESimulators/FBL_U_kernel.opencl
Normal file
@@ -0,0 +1,163 @@
|
||||
/*
|
||||
This OpenCL kernel implements part of the Forward Backward Linear
|
||||
numerical scheme for the shallow water equations, described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves U one step in time.
|
||||
*/
|
||||
__kernel void computeUKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
//Data
|
||||
__global float* H_ptr_, int H_pitch_,
|
||||
__global float* U_ptr_, int U_pitch_,
|
||||
__global float* V_ptr_, int V_pitch_,
|
||||
__global float* eta_ptr_, int eta_pitch_,
|
||||
|
||||
// Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
__local float H_shared[block_height][block_width+1];
|
||||
__local float V_shared[block_height+1][block_width+1];
|
||||
__local float eta_shared[block_height][block_width+1];
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0);
|
||||
const int tj = get_global_id(1);
|
||||
|
||||
//Compute pointer to row "tj" in the U array
|
||||
__global float* const U_row = (__global float*) ((__global char*) U_ptr_ + U_pitch_*tj);
|
||||
|
||||
//Read current U
|
||||
float U_current = 0.0f;
|
||||
if (ti < nx_ + 1 && tj < ny_) {
|
||||
U_current = U_row[ti];
|
||||
}
|
||||
|
||||
//Read H and eta into local memory
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = by + j;
|
||||
|
||||
//Compute the pointer to row "l" in the H and eta arrays
|
||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
||||
__global float* const eta_row = (__global float*) ((__global char*) eta_ptr_ + eta_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = bx + i - 1;
|
||||
|
||||
if (k >= 0 && k < nx_ && l < ny_+1) {
|
||||
H_shared[j][i] = H_row[k];
|
||||
eta_shared[j][i] = eta_row[k];
|
||||
}
|
||||
else {
|
||||
H_shared[j][i] = 0.0f;
|
||||
eta_shared[j][i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Read V into shared memory
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = by + j;
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const V_row = (__global float*) ((__global char*) V_ptr_ + V_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = bx + i - 1;
|
||||
|
||||
if (k >= 0 && k < nx_ && l < ny_+1) {
|
||||
V_shared[j][i] = V_row[k];
|
||||
}
|
||||
else {
|
||||
V_shared[j][i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Reconstruct H at the U position
|
||||
float H_m = 0.5f*(H_shared[ty][tx] + H_shared[ty][tx+1]);
|
||||
|
||||
//Reconstruct V at the U position
|
||||
float V_m = 0.0f;
|
||||
if (tj==0) {
|
||||
V_m = 0.5f*(V_shared[ty+1][tx] + V_shared[ty+1][tx+1]);
|
||||
}
|
||||
else if (tj==ny_-1) {
|
||||
V_m = 0.5f*(V_shared[ty][tx] + V_shared[ty][tx+1]);
|
||||
}
|
||||
else {
|
||||
V_m = 0.25f*(V_shared[ty][tx] + V_shared[ty][tx+1]
|
||||
+ V_shared[ty+1][tx] + V_shared[ty+1][tx+1]);
|
||||
}
|
||||
|
||||
//Calculate the friction coefficient
|
||||
float B = H_m/(H_m + r_*dt_);
|
||||
|
||||
//Calculate the gravitational effect
|
||||
float P = g_*H_m*(eta_shared[ty][tx] - eta_shared[ty][tx+1])/dx_;
|
||||
|
||||
//Calculate the wind shear stress
|
||||
float X = windStressX(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
//Compute the U at the next timestep
|
||||
float U_next = B*(U_current + dt_*(f_*V_m + P + X) );
|
||||
|
||||
//Write to main memory for internal cells
|
||||
if (ti < nx_+1 && tj < ny_) {
|
||||
//Closed boundaries
|
||||
if (ti == 0 || ti == nx_) {
|
||||
U_next = 0.0f;
|
||||
}
|
||||
U_row[ti] = U_next;
|
||||
}
|
||||
}
|
||||
168
SWESimulators/FBL_V_kernel.opencl
Normal file
168
SWESimulators/FBL_V_kernel.opencl
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
This OpenCL kernel implements part of the Forward Backward Linear
|
||||
numerical scheme for the shallow water equations, described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves V one step in time.
|
||||
*/
|
||||
__kernel void computeVKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
//Data
|
||||
__global float* H_ptr_, int H_pitch_,
|
||||
__global float* U_ptr_, int U_pitch_,
|
||||
__global float* V_ptr_, int V_pitch_,
|
||||
__global float* eta_ptr_, int eta_pitch_,
|
||||
|
||||
// Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
__local float H_shared[block_height+1][block_width];
|
||||
__local float U_shared[block_height+1][block_width+1];
|
||||
__local float eta_shared[block_height+1][block_width];
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0);
|
||||
const int tj = get_global_id(1);
|
||||
|
||||
//Compute pointer to current row in the U array
|
||||
__global float* const V_row = (__global float*) ((__global char*) V_ptr_ + V_pitch_*tj);
|
||||
|
||||
//Read current V
|
||||
float V_current = 0.0f;
|
||||
if (ti < nx_ && tj < ny_+1) {
|
||||
V_current = V_row[ti];
|
||||
}
|
||||
|
||||
//Read H and eta into shared memory
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = by + j - 1;
|
||||
|
||||
//Compute the pointer to current row in the H and eta arrays
|
||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
||||
__global float* const eta_row = (__global float*) ((__global char*) eta_ptr_ + eta_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = bx + i;
|
||||
if (k < nx_ && l >= 0 && l < ny_+1) {
|
||||
H_shared[j][i] = H_row[k];
|
||||
eta_shared[j][i] = eta_row[k];
|
||||
}
|
||||
else {
|
||||
H_shared[j][i] = 0.0f;
|
||||
eta_shared[j][i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Read U into shared memory
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = by + j - 1;
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const U_row = (__global float*) ((__global char*) U_ptr_ + U_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = bx + i;
|
||||
if (k < nx_+1 && l >= 0 && l < ny_) {
|
||||
U_shared[j][i] = U_row[k];
|
||||
}
|
||||
else {
|
||||
U_shared[j][i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Reconstruct H at the V position
|
||||
float H_m = 0.5f*(H_shared[ty][tx] + H_shared[ty+1][tx]);
|
||||
|
||||
//Reconstruct U at the V position
|
||||
float U_m;
|
||||
if (ti==0) {
|
||||
U_m = 0.5f*(U_shared[ty][tx+1] + U_shared[ty+1][tx+1]);
|
||||
}
|
||||
else if (ti==nx_-1) {
|
||||
U_m = 0.5f*(U_shared[ty][tx] + U_shared[ty+1][tx]);
|
||||
}
|
||||
else {
|
||||
U_m = 0.25f*(U_shared[ty][tx] + U_shared[ty][tx+1]
|
||||
+ U_shared[ty+1][tx] + U_shared[ty+1][tx+1]);
|
||||
}
|
||||
|
||||
//Calculate the friction coefficient
|
||||
float B = H_m/(H_m + r_*dt_);
|
||||
|
||||
//Calculate the gravitational effect
|
||||
float P = g_*H_m*(eta_shared[ty][tx] - eta_shared[ty+1][tx])/dy_;
|
||||
|
||||
//Calculate the wind shear stress
|
||||
float Y = windStressY(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
//Compute the V at the next timestep
|
||||
float V_next = B*(V_current + dt_*(-f_*U_m + P + Y) );
|
||||
|
||||
//Write to main memory
|
||||
if (ti < nx_ && tj < ny_+1) {
|
||||
//Closed boundaries
|
||||
if (tj == 0) {
|
||||
V_next = 0.0f;
|
||||
}
|
||||
else if (tj == ny_) {
|
||||
V_next = 0.0f;
|
||||
}
|
||||
|
||||
V_row[ti] = V_next;
|
||||
}
|
||||
}
|
||||
113
SWESimulators/FBL_eta_kernel.opencl
Normal file
113
SWESimulators/FBL_eta_kernel.opencl
Normal file
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
This OpenCL kernel implements part of the Forward Backward Linear
|
||||
numerical scheme for the shallow water equations, described in
|
||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
||||
predictions", Met no report 2012/3 and 2012/5 .
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Kernel that evolves eta one step in time.
|
||||
*/
|
||||
__kernel void computeEtaKernel(
|
||||
//Discretization parameters
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
|
||||
//Physical parameters
|
||||
float g_, //< Gravitational constant
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
//Data
|
||||
__global float* H_ptr_, int H_pitch_,
|
||||
__global float* U_ptr_, int U_pitch_,
|
||||
__global float* V_ptr_, int V_pitch_,
|
||||
__global float* eta_ptr_, int eta_pitch_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0);
|
||||
const int tj = get_global_id(1);
|
||||
|
||||
__local float U_shared[block_height][block_width+1];
|
||||
__local float V_shared[block_height+1][block_width];
|
||||
|
||||
//Compute pointer to current row in the U array
|
||||
__global float* const eta_row = (__global float*) ((__global char*) eta_ptr_ + eta_pitch_*tj);
|
||||
|
||||
//Read current eta
|
||||
float eta_current = 0.0f;
|
||||
if (ti < nx_ && tj < ny_) {
|
||||
eta_current = eta_row[ti];
|
||||
}
|
||||
|
||||
//Read U into shared memory
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const unsigned int l = by + j;
|
||||
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const U_row = (__global float*) ((__global char*) U_ptr_ + U_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const unsigned int k = bx + i;
|
||||
if (k < nx_+1 && l < ny_) {
|
||||
U_shared[j][i] = U_row[k];
|
||||
}
|
||||
else {
|
||||
U_shared[j][i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Read V into shared memory
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const unsigned int l = by + j;
|
||||
//Compute the pointer to current row in the V array
|
||||
__global float* const V_row = (__global float*) ((__global char*) V_ptr_ + V_pitch_*l);
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const unsigned int k = bx + i;
|
||||
if (k < nx_ && l < ny_+1) {
|
||||
V_shared[j][i] = V_row[k];
|
||||
}
|
||||
else {
|
||||
V_shared[j][i] = 0.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Make sure all threads have read into shared mem
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute the eta at the next timestep
|
||||
float eta_next = eta_current - dt_/dx_ * (U_shared[ty][tx+1] - U_shared[ty][tx])
|
||||
- dt_/dy_ * (V_shared[ty+1][tx] - V_shared[ty][tx]);
|
||||
|
||||
//Write to main memory
|
||||
if (ti < nx_ && tj < ny_) {
|
||||
eta_row[ti] = eta_next;
|
||||
}
|
||||
}
|
||||
133
SWESimulators/FORCE.py
Normal file
133
SWESimulators/FORCE.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the FORCE flux
|
||||
for the shallow water equations
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations
|
||||
"""
|
||||
class FORCE:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.kernel = Common.get_kernel(self.cl_ctx, "FORCE_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
self.cl_data.swap()
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
168
SWESimulators/FORCE_kernel.opencl
Normal file
168
SWESimulators/FORCE_kernel.opencl
Normal file
@@ -0,0 +1,168 @@
|
||||
/*
|
||||
This OpenCL kernel implements the classical Lax-Friedrichs scheme
|
||||
for the shallow water equations, with edge fluxes.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const float g_, const float dx_, const float dt_) {
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Compute fluxes along the x axis
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 1; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i;
|
||||
|
||||
// Q at interface from the right and left
|
||||
const float3 Qp = (float3)(Q[0][l][k+1],
|
||||
Q[1][l][k+1],
|
||||
Q[2][l][k+1]);
|
||||
const float3 Qm = (float3)(Q[0][l][k],
|
||||
Q[1][l][k],
|
||||
Q[2][l][k]);
|
||||
|
||||
// Computed flux
|
||||
const float3 flux = FORCE_1D_flux(Qm, Qp, g_, dx_, dt_);
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the y axis for all faces
|
||||
*/
|
||||
void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const float g_, const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Compute fluxes along the y axis
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 1; //Skip ghost cells
|
||||
|
||||
// Q at interface from the right and left
|
||||
// Note that we swap hu and hv
|
||||
const float3 Qp = (float3)(Q[0][l+1][k],
|
||||
Q[2][l+1][k],
|
||||
Q[1][l+1][k]);
|
||||
const float3 Qm = (float3)(Q[0][l][k],
|
||||
Q[2][l][k],
|
||||
Q[1][l][k]);
|
||||
|
||||
// Computed flux
|
||||
// Note that we swap back
|
||||
const float3 flux = FORCE_1D_flux(Qm, Qp, g_, dy_, dt_);
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||
const int tj = get_global_id(1) + 1;
|
||||
|
||||
__local float Q[3][block_height+2][block_width+2];
|
||||
__local float F[3][block_height+1][block_width+1];
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
readBlock1(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Save our input variables
|
||||
const float h0 = Q[0][ty+1][tx+1];
|
||||
const float hu0 = Q[1][ty+1][tx+1];
|
||||
const float hv0 = Q[2][ty+1][tx+1];
|
||||
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary1(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute flux along x, and evolve
|
||||
computeFluxF(Q, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF1(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary1(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute flux along y, and evolve
|
||||
computeFluxG(Q, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG1(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Write to main memory
|
||||
writeBlock1(h1_ptr_, h1_pitch_,
|
||||
hu1_ptr_, hu1_pitch_,
|
||||
hv1_ptr_, hv1_pitch_,
|
||||
Q, nx_, ny_);
|
||||
}
|
||||
129
SWESimulators/HLL.py
Normal file
129
SWESimulators/HLL.py
Normal file
@@ -0,0 +1,129 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the HLL flux
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
|
||||
"""
|
||||
class HLL:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
u0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
v0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx,
|
||||
h0, u0, v0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.lxf_kernel = Common.get_kernel(self.cl_ctx, "HLL_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, u0, v0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.lxf_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
self.cl_data.swap()
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
150
SWESimulators/HLL2.py
Normal file
150
SWESimulators/HLL2.py
Normal file
@@ -0,0 +1,150 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the 2nd order HLL flux
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class HLL2:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
theta=1.8, \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.swe_kernel = Common.get_kernel(self.cl_ctx, "HLL2_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
#Dimensional splitting: second order accurate for every other timestep,
|
||||
#thus run two timesteps in a go
|
||||
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
#Along X, then Y
|
||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
#Along Y, then X
|
||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
225
SWESimulators/HLL2_kernel.opencl
Normal file
225
SWESimulators/HLL2_kernel.opencl
Normal file
@@ -0,0 +1,225 @@
|
||||
/*
|
||||
This OpenCL kernel implements the second order HLL flux
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qx[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const float g_, const float dx_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
// Reconstruct point values of Q at the left and right hand side
|
||||
// of the cell for both the left (i) and right (i+1) cell
|
||||
const float3 Q_rl = (float3)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
||||
const float3 Q_rr = (float3)(Q[0][l][k+1] + 0.5f*Qx[0][j][i+1],
|
||||
Q[1][l][k+1] + 0.5f*Qx[1][j][i+1],
|
||||
Q[2][l][k+1] + 0.5f*Qx[2][j][i+1]);
|
||||
|
||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qx[0][j][i],
|
||||
Q[1][l][k] - 0.5f*Qx[1][j][i],
|
||||
Q[2][l][k] - 0.5f*Qx[2][j][i]);
|
||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qx[0][j][i],
|
||||
Q[1][l][k] + 0.5f*Qx[1][j][i],
|
||||
Q[2][l][k] + 0.5f*Qx[2][j][i]);
|
||||
|
||||
//Evolve half a timestep (predictor step)
|
||||
const float3 Q_r_bar = Q_rl + dt_/(2.0f*dx_) * (F_func(Q_rl, g_) - F_func(Q_rr, g_));
|
||||
const float3 Q_l_bar = Q_lr + dt_/(2.0f*dx_) * (F_func(Q_ll, g_) - F_func(Q_lr, g_));
|
||||
|
||||
// Compute flux based on prediction
|
||||
const float3 flux = HLL_flux(Q_l_bar, Q_r_bar, g_);
|
||||
|
||||
//Write to shared memory
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qy[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const float g_, const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
// Reconstruct point values of Q at the left and right hand side
|
||||
// of the cell for both the left (i) and right (i+1) cell
|
||||
//NOte that hu and hv are swapped ("transposing" the domain)!
|
||||
const float3 Q_rl = (float3)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
||||
const float3 Q_rr = (float3)(Q[0][l+1][k] + 0.5f*Qy[0][j+1][i],
|
||||
Q[2][l+1][k] + 0.5f*Qy[2][j+1][i],
|
||||
Q[1][l+1][k] + 0.5f*Qy[1][j+1][i]);
|
||||
|
||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qy[0][j][i],
|
||||
Q[2][l][k] - 0.5f*Qy[2][j][i],
|
||||
Q[1][l][k] - 0.5f*Qy[1][j][i]);
|
||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qy[0][j][i],
|
||||
Q[2][l][k] + 0.5f*Qy[2][j][i],
|
||||
Q[1][l][k] + 0.5f*Qy[1][j][i]);
|
||||
|
||||
//Evolve half a timestep (predictor step)
|
||||
const float3 Q_r_bar = Q_rl + dt_/(2.0f*dy_) * (F_func(Q_rl, g_) - F_func(Q_rr, g_));
|
||||
const float3 Q_l_bar = Q_lr + dt_/(2.0f*dy_) * (F_func(Q_ll, g_) - F_func(Q_lr, g_));
|
||||
|
||||
// Compute flux based on prediction
|
||||
const float3 flux = HLL_flux(Q_l_bar, Q_r_bar, g_);
|
||||
|
||||
//Write to shared memory
|
||||
//Note that we here swap hu and hv back to the original
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
float theta_,
|
||||
|
||||
int step_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
||||
|
||||
//Shared memory variables
|
||||
__local float Q[3][block_height+4][block_width+4];
|
||||
__local float Qx[3][block_height+2][block_width+2];
|
||||
__local float F[3][block_height+1][block_width+1];
|
||||
|
||||
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
readBlock2(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Step 0 => evolve x first, then y
|
||||
if (step_ == 0) {
|
||||
//Compute fluxes along the x axis and evolve
|
||||
minmodSlopeX(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute fluxes along the y axis and evolve
|
||||
minmodSlopeY(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
//Step 1 => evolve y first, then x
|
||||
else {
|
||||
//Compute fluxes along the y axis and evolve
|
||||
minmodSlopeY(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute fluxes along the x axis and evolve
|
||||
minmodSlopeX(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock2(h1_ptr_, h1_pitch_,
|
||||
hu1_ptr_, hu1_pitch_,
|
||||
hv1_ptr_, hv1_pitch_,
|
||||
Q, nx_, ny_);
|
||||
}
|
||||
156
SWESimulators/HLL_kernel.opencl
Normal file
156
SWESimulators/HLL_kernel.opencl
Normal file
@@ -0,0 +1,156 @@
|
||||
/*
|
||||
This OpenCL kernel implements the HLL flux
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const float g_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 1; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i;
|
||||
|
||||
const float3 Q_l = (float3)(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
||||
const float3 Q_r = (float3)(Q[0][l][k+1], Q[1][l][k+1], Q[2][l][k+1]);
|
||||
|
||||
const float3 flux = HLL_flux(Q_l, Q_r, g_);
|
||||
|
||||
//Write to shared memory
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const float g_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 1; //Skip ghost cells
|
||||
|
||||
//NOte that hu and hv are swapped ("transposing" the domain)!
|
||||
const float3 Q_l = (float3)(Q[0][l ][k], Q[2][l ][k], Q[1][l ][k]);
|
||||
const float3 Q_r = (float3)(Q[0][l+1][k], Q[2][l+1][k], Q[1][l+1][k]);
|
||||
|
||||
// Computed flux
|
||||
const float3 flux = HLL_flux(Q_l, Q_r, g_);
|
||||
|
||||
//Write to shared memory
|
||||
//Note that we here swap hu and hv back to the original
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
||||
//Shared memory variables
|
||||
__local float Q[3][block_height+2][block_width+2];
|
||||
__local float F[3][block_height+1][block_width+1];
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
readBlock1(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
noFlowBoundary1(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute F flux
|
||||
computeFluxF(Q, F, g_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF1(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary1(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute G flux
|
||||
computeFluxG(Q, F, g_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG1(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock1(h1_ptr_, h1_pitch_,
|
||||
hu1_ptr_, hu1_pitch_,
|
||||
hv1_ptr_, hv1_pitch_,
|
||||
Q, nx_, ny_);
|
||||
}
|
||||
198
SWESimulators/KP07.py
Normal file
198
SWESimulators/KP07.py
Normal file
@@ -0,0 +1,198 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class KP07:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
f: Coriolis parameter (1.2e-4 s^1)
|
||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||
wind_type: Type of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
||||
wind_tau0: Amplitude of wind stress (Pa)
|
||||
wind_rho: Density of sea water (1025.0 kg / m^3)
|
||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, f=0.0, r=0.0, \
|
||||
theta=1.3, use_rk2=True,
|
||||
wind_stress=Common.WindStressParams(), \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.kp07_kernel = Common.get_kernel(self.cl_ctx, "KP07_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.f = np.float32(f)
|
||||
self.r = np.float32(r)
|
||||
self.theta = np.float32(theta)
|
||||
self.use_rk2 = use_rk2
|
||||
self.wind_stress = wind_stress
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
if (self.use_rk2):
|
||||
self.kp07_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.f, \
|
||||
self.r, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
self.kp07_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.f, \
|
||||
self.r, \
|
||||
np.int32(1), \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
else:
|
||||
self.kp07_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.f, \
|
||||
self.r, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
||||
self.wind_stress.type, \
|
||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
||||
self.wind_stress.x0, self.wind_stress.y0, \
|
||||
self.wind_stress.u0, self.wind_stress.v0, \
|
||||
self.t)
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
153
SWESimulators/KP07_dimsplit.py
Normal file
153
SWESimulators/KP07_dimsplit.py
Normal file
@@ -0,0 +1,153 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the dimentionally split KP07 scheme
|
||||
"""
|
||||
class KP07_dimsplit:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
theta=1.3, \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.swe_kernel = Common.get_kernel(self.cl_ctx, "KP07_dimsplit_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
#Dimensional splitting: second order accurate for every other timestep,
|
||||
#thus run two timesteps in a go
|
||||
|
||||
#Compute timestep
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
#Along X, then Y
|
||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
#Along Y, then X
|
||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += 2.0*local_dt
|
||||
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
217
SWESimulators/KP07_dimsplit_kernel.opencl
Normal file
217
SWESimulators/KP07_dimsplit_kernel.opencl
Normal file
@@ -0,0 +1,217 @@
|
||||
/*
|
||||
This OpenCL kernel implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qx[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const float g_, const float dx_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
// Reconstruct point values of Q at the left and right hand side
|
||||
// of the cell for both the left (i) and right (i+1) cell
|
||||
const float3 Q_rl = (float3)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
||||
const float3 Q_rr = (float3)(Q[0][l][k+1] + 0.5f*Qx[0][j][i+1],
|
||||
Q[1][l][k+1] + 0.5f*Qx[1][j][i+1],
|
||||
Q[2][l][k+1] + 0.5f*Qx[2][j][i+1]);
|
||||
|
||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qx[0][j][i],
|
||||
Q[1][l][k] - 0.5f*Qx[1][j][i],
|
||||
Q[2][l][k] - 0.5f*Qx[2][j][i]);
|
||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qx[0][j][i],
|
||||
Q[1][l][k] + 0.5f*Qx[1][j][i],
|
||||
Q[2][l][k] + 0.5f*Qx[2][j][i]);
|
||||
|
||||
//Evolve half a timestep (predictor step)
|
||||
const float3 Q_r_bar = Q_rl + dt_/(2.0f*dx_) * (F_func(Q_rl, g_) - F_func(Q_rr, g_));
|
||||
const float3 Q_l_bar = Q_lr + dt_/(2.0f*dx_) * (F_func(Q_ll, g_) - F_func(Q_lr, g_));
|
||||
|
||||
// Compute flux based on prediction
|
||||
const float3 flux = CentralUpwindFlux(Q_l_bar, Q_r_bar, g_);
|
||||
|
||||
//Write to shared memory
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qy[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const float g_, const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
// Reconstruct point values of Q at the left and right hand side
|
||||
// of the cell for both the left (i) and right (i+1) cell
|
||||
//NOte that hu and hv are swapped ("transposing" the domain)!
|
||||
const float3 Q_rl = (float3)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
||||
const float3 Q_rr = (float3)(Q[0][l+1][k] + 0.5f*Qy[0][j+1][i],
|
||||
Q[2][l+1][k] + 0.5f*Qy[2][j+1][i],
|
||||
Q[1][l+1][k] + 0.5f*Qy[1][j+1][i]);
|
||||
|
||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qy[0][j][i],
|
||||
Q[2][l][k] - 0.5f*Qy[2][j][i],
|
||||
Q[1][l][k] - 0.5f*Qy[1][j][i]);
|
||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qy[0][j][i],
|
||||
Q[2][l][k] + 0.5f*Qy[2][j][i],
|
||||
Q[1][l][k] + 0.5f*Qy[1][j][i]);
|
||||
|
||||
//Evolve half a timestep (predictor step)
|
||||
const float3 Q_r_bar = Q_rl + dt_/(2.0f*dy_) * (F_func(Q_rl, g_) - F_func(Q_rr, g_));
|
||||
const float3 Q_l_bar = Q_lr + dt_/(2.0f*dy_) * (F_func(Q_ll, g_) - F_func(Q_lr, g_));
|
||||
|
||||
// Compute flux based on prediction
|
||||
const float3 flux = CentralUpwindFlux(Q_l_bar, Q_r_bar, g_);
|
||||
|
||||
//Write to shared memory
|
||||
//Note that we here swap hu and hv back to the original
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
|
||||
*/
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
float theta_,
|
||||
|
||||
int step_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
||||
|
||||
|
||||
//Shared memory variables
|
||||
__local float Q[3][block_height+4][block_width+4];
|
||||
__local float Qx[3][block_height+2][block_width+2];
|
||||
__local float F[3][block_height+1][block_width+1];
|
||||
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
readBlock2(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Fix boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
//Step 0 => evolve x first, then y
|
||||
if (step_ == 0) {
|
||||
//Compute fluxes along the x axis and evolve
|
||||
minmodSlopeX(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute fluxes along the y axis and evolve
|
||||
minmodSlopeY(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
//Step 1 => evolve y first, then x
|
||||
else {
|
||||
//Compute fluxes along the y axis and evolve
|
||||
minmodSlopeY(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute fluxes along the x axis and evolve
|
||||
minmodSlopeX(Q, Qx, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock2(h1_ptr_, h1_pitch_,
|
||||
hu1_ptr_, hu1_pitch_,
|
||||
hv1_ptr_, hv1_pitch_,
|
||||
Q, nx_, ny_);
|
||||
}
|
||||
236
SWESimulators/KP07_kernel.opencl
Normal file
236
SWESimulators/KP07_kernel.opencl
Normal file
@@ -0,0 +1,236 @@
|
||||
/*
|
||||
This OpenCL kernel implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qx[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const float g_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
// Q at interface from the right and left
|
||||
const float3 Qp = (float3)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
||||
const float3 Qm = (float3)(Q[0][l][k ] + 0.5f*Qx[0][j][i ],
|
||||
Q[1][l][k ] + 0.5f*Qx[1][j][i ],
|
||||
Q[2][l][k ] + 0.5f*Qx[2][j][i ]);
|
||||
|
||||
// Computed flux
|
||||
const float3 flux = CentralUpwindFlux(Qm, Qp, g_);
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qy[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const float g_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
// Q at interface from the right and left
|
||||
// Note that we swap hu and hv
|
||||
const float3 Qp = (float3)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
||||
const float3 Qm = (float3)(Q[0][l ][k] + 0.5f*Qy[0][j ][i],
|
||||
Q[2][l ][k] + 0.5f*Qy[2][j ][i],
|
||||
Q[1][l ][k] + 0.5f*Qy[1][j ][i]);
|
||||
|
||||
// Computed flux
|
||||
// Note that we swap back
|
||||
const float3 flux = CentralUpwindFlux(Qm, Qp, g_);
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
|
||||
*/
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
float theta_,
|
||||
|
||||
float f_, //< Coriolis coefficient
|
||||
float r_, //< Bottom friction coefficient
|
||||
|
||||
int step_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_,
|
||||
|
||||
//Wind stress parameters
|
||||
int wind_stress_type_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||
const int tj = get_global_id(1) + 2;
|
||||
|
||||
//Shared memory variables
|
||||
__local float Q[3][block_height+4][block_width+4];
|
||||
|
||||
//The following slightly wastes memory, but enables us to reuse the
|
||||
//funcitons in common.opencl
|
||||
__local float Qx[3][block_height+2][block_width+2];
|
||||
__local float Qy[3][block_height+2][block_width+2];
|
||||
__local float F[3][block_height+1][block_width+1];
|
||||
__local float G[3][block_height+1][block_width+1];
|
||||
|
||||
|
||||
|
||||
//Read into shared memory
|
||||
readBlock2(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Fix boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Reconstruct slopes along x and axis
|
||||
minmodSlopeX(Q, Qx, theta_);
|
||||
minmodSlopeY(Q, Qy, theta_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Compute fluxes along the x and y axis
|
||||
computeFluxF(Q, Qx, F, g_);
|
||||
computeFluxG(Q, Qy, G, g_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Sum fluxes and advance in time for all internal cells
|
||||
if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) {
|
||||
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
||||
const int j = ty + 2;
|
||||
|
||||
const float X = windStressX(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
const float Y = windStressY(
|
||||
wind_stress_type_,
|
||||
dx_, dy_, dt_,
|
||||
tau0_, rho_, alpha_, xm_, Rc_,
|
||||
x0_, y0_,
|
||||
u0_, v0_,
|
||||
t_);
|
||||
|
||||
const float h1 = Q[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_;
|
||||
const float hu1 = Q[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_
|
||||
+ dt_*X - dt_*f_*Q[2][j][i];
|
||||
const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_
|
||||
+ dt_*Y + dt_*f_*Q[1][j][i];
|
||||
|
||||
__global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj);
|
||||
|
||||
const float C = 2.0f*r_*dt_/Q[0][j][i];
|
||||
|
||||
if (step_ == 0) {
|
||||
//First step of RK2 ODE integrator
|
||||
|
||||
h_row[ti] = h1;
|
||||
hu_row[ti] = hu1 / (1.0f + C);
|
||||
hv_row[ti] = hv1 / (1.0f + C);
|
||||
}
|
||||
else if (step_ == 1) {
|
||||
//Second step of RK2 ODE integrator
|
||||
|
||||
//First read Q^n
|
||||
const float h_a = h_row[ti];
|
||||
const float hu_a = hu_row[ti];
|
||||
const float hv_a = hv_row[ti];
|
||||
|
||||
//Compute Q^n+1
|
||||
const float h_b = 0.5f*(h_a + h1);
|
||||
const float hu_b = 0.5f*(hu_a + hu1);
|
||||
const float hv_b = 0.5f*(hv_a + hv1);
|
||||
|
||||
//Write to main memory
|
||||
h_row[ti] = h_b;
|
||||
hu_row[ti] = hu_b / (1.0f + 0.5f*C);
|
||||
hv_row[ti] = hv_b / (1.0f + 0.5f*C);
|
||||
}
|
||||
}
|
||||
}
|
||||
133
SWESimulators/LxF.py
Normal file
133
SWESimulators/LxF.py
Normal file
@@ -0,0 +1,133 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the classical Lax-Friedrichs numerical
|
||||
scheme for the shallow water equations
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class LxF:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.lxf_kernel = Common.get_kernel(self.cl_ctx, "LxF_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.lxf_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
self.cl_data.swap()
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
158
SWESimulators/LxF_kernel.opencl
Normal file
158
SWESimulators/LxF_kernel.opencl
Normal file
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
This OpenCL kernel implements the classical Lax-Friedrichs scheme
|
||||
for the shallow water equations, with edge fluxes.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height][block_width+1],
|
||||
const float g_, const float dx_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 1; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i;
|
||||
|
||||
// Q at interface from the right and left
|
||||
const float3 Qp = (float3)(Q[0][l][k+1],
|
||||
Q[1][l][k+1],
|
||||
Q[2][l][k+1]);
|
||||
const float3 Qm = (float3)(Q[0][l][k],
|
||||
Q[1][l][k],
|
||||
Q[2][l][k]);
|
||||
|
||||
// Computed flux
|
||||
const float3 flux = LxF_2D_flux(Qm, Qp, g_, dx_, dt_);
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the y axis for all faces
|
||||
*/
|
||||
void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width],
|
||||
const float g_, const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 1; //Skip ghost cells
|
||||
|
||||
// Q at interface from the right and left
|
||||
// Note that we swap hu and hv
|
||||
const float3 Qp = (float3)(Q[0][l+1][k],
|
||||
Q[2][l+1][k],
|
||||
Q[1][l+1][k]);
|
||||
const float3 Qm = (float3)(Q[0][l][k],
|
||||
Q[2][l][k],
|
||||
Q[1][l][k]);
|
||||
|
||||
// Computed flux
|
||||
// Note that we swap back
|
||||
const float3 flux = LxF_2D_flux(Qm, Qp, g_, dy_, dt_);
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||
const int tj = get_global_id(1) + 1;
|
||||
|
||||
__local float Q[3][block_height+2][block_width+2];
|
||||
__local float F[3][block_height][block_width+1];
|
||||
__local float G[3][block_height+1][block_width];
|
||||
|
||||
//Read into shared memory
|
||||
readBlock1(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary1(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Compute fluxes along the x and y axis
|
||||
computeFluxF(Q, F, g_, dx_, dt_);
|
||||
computeFluxG(Q, G, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Evolve for all internal cells
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
||||
const int j = ty + 1;
|
||||
|
||||
const float h1 = Q[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_;
|
||||
const float hu1 = Q[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_;
|
||||
const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
||||
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_;
|
||||
|
||||
__global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj);
|
||||
|
||||
h_row[ti] = h1;
|
||||
hu_row[ti] = hu1;
|
||||
hv_row[ti] = hv1;
|
||||
}
|
||||
}
|
||||
165
SWESimulators/PlotHelper.py
Normal file
165
SWESimulators/PlotHelper.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python class aids in plotting results from the numerical
|
||||
simulations
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
import matplotlib.gridspec as gridspec
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
"""
|
||||
Class that makes plotting faster by caching the plots instead of recreating them
|
||||
"""
|
||||
class PlotHelper:
|
||||
|
||||
def __init__(self, fig, x_coords, y_coords, radius, eta1, u1, v1, eta2=None, u2=None, v2=None, interpolation_type='spline36'):
|
||||
self.ny, self.nx = eta1.shape
|
||||
self.fig = fig;
|
||||
|
||||
fig.set_figheight(15)
|
||||
fig.set_figwidth(15)
|
||||
|
||||
min_x = np.min(x_coords[:,0]);
|
||||
min_y = np.min(y_coords[0,:]);
|
||||
|
||||
max_x = np.max(x_coords[0,:]);
|
||||
max_y = np.max(y_coords[:,0]);
|
||||
|
||||
domain_extent = [ x_coords[0, 0], x_coords[0, -1], y_coords[0, 0], y_coords[-1, 0] ]
|
||||
|
||||
if (eta2 is not None):
|
||||
assert(u2 is not None)
|
||||
assert(v2 is not None)
|
||||
self.gs = gridspec.GridSpec(3, 3)
|
||||
else:
|
||||
self.gs = gridspec.GridSpec(2, 3)
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[0, 0])
|
||||
self.sp_eta = plt.imshow(eta1, interpolation=interpolation_type, origin='bottom', vmin=-0.05, vmax=0.05, extent=domain_extent)
|
||||
plt.axis('tight')
|
||||
ax.set_aspect('equal')
|
||||
plt.title('Eta')
|
||||
plt.colorbar()
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[0, 1])
|
||||
self.sp_u = plt.imshow(u1, interpolation=interpolation_type, origin='bottom', vmin=-1.5, vmax=1.5, extent=domain_extent)
|
||||
plt.axis('tight')
|
||||
ax.set_aspect('equal')
|
||||
plt.title('U')
|
||||
plt.colorbar()
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[0, 2])
|
||||
self.sp_v = plt.imshow(v1, interpolation=interpolation_type, origin='bottom', vmin=-1.5, vmax=1.5, extent=domain_extent)
|
||||
plt.axis('tight')
|
||||
ax.set_aspect('equal')
|
||||
plt.title('V')
|
||||
plt.colorbar()
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[1, 0])
|
||||
self.sp_radial1, = plt.plot(radius.ravel(), eta1.ravel(), '.')
|
||||
plt.axis([0, min(max_x, max_y), -1.5, 1])
|
||||
plt.title('Eta Radial plot')
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[1, 1])
|
||||
self.sp_x_axis1, = plt.plot(x_coords[self.ny/2,:], eta1[self.ny/2,:], 'k+--', label='x-axis')
|
||||
self.sp_y_axis1, = plt.plot(y_coords[:,self.nx/2], eta1[:,self.nx/2], 'kx:', label='y-axis')
|
||||
plt.axis([max(min_x, min_y), min(max_x, max_y), -1.5, 1])
|
||||
plt.title('Eta along axis')
|
||||
plt.legend()
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[1, 2])
|
||||
self.sp_x_diag1, = plt.plot(1.41*np.diagonal(x_coords, offset=-abs(self.nx-self.ny)/2), \
|
||||
np.diagonal(eta1, offset=-abs(self.nx-self.ny)/2), \
|
||||
'k+--', label='x = -y')
|
||||
self.sp_y_diag1, = plt.plot(1.41*np.diagonal(y_coords.T, offset=abs(self.nx-self.ny)/2), \
|
||||
np.diagonal(eta1.T, offset=abs(self.nx-self.ny)/2), \
|
||||
'kx:', label='x = y')
|
||||
plt.axis([max(min_x, min_y), min(max_x, max_y), -1.5, 1])
|
||||
plt.title('Eta along diagonal')
|
||||
plt.legend()
|
||||
|
||||
|
||||
if (eta2 is not None):
|
||||
ax = self.fig.add_subplot(self.gs[2, 0])
|
||||
self.sp_radial2, = plt.plot(radius.ravel(), eta2.ravel(), '.')
|
||||
plt.axis([0, min(max_x, max_y), -1.5, 1])
|
||||
plt.title('Eta2 Radial plot')
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[2, 1])
|
||||
self.sp_x_axis2, = plt.plot(x_coords[self.ny/2,:], eta2[self.ny/2,:], 'k+--', label='x-axis')
|
||||
self.sp_y_axis2, = plt.plot(y_coords[:,self.nx/2], eta2[:,self.nx/2], 'kx:', label='y-axis')
|
||||
plt.axis([max(min_x, min_y), min(max_x, max_y), -1.5, 1])
|
||||
plt.title('Eta2 along axis')
|
||||
plt.legend()
|
||||
|
||||
ax = self.fig.add_subplot(self.gs[2, 2])
|
||||
self.sp_x_diag2, = plt.plot(1.41*np.diagonal(x_coords, offset=-abs(self.nx-self.ny)/2), \
|
||||
np.diagonal(eta2, offset=-abs(self.nx-self.ny)/2), \
|
||||
'k+--', label='x = -y')
|
||||
self.sp_y_diag2, = plt.plot(1.41*np.diagonal(y_coords.T, offset=abs(self.nx-self.ny)/2), \
|
||||
np.diagonal(eta2.T, offset=abs(self.nx-self.ny)/2), \
|
||||
'kx:', label='x = y')
|
||||
plt.axis([max(min_x, min_y), min(max_x, max_y), -1.5, 1])
|
||||
plt.title('Eta2 along diagonal')
|
||||
plt.legend()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def plot(self, eta1, u1, v1, eta2=None, u2=None, v2=None):
|
||||
self.fig.add_subplot(self.gs[0, 0])
|
||||
self.sp_eta.set_data(eta1)
|
||||
|
||||
self.fig.add_subplot(self.gs[0, 1])
|
||||
self.sp_u.set_data(u1)
|
||||
|
||||
self.fig.add_subplot(self.gs[0, 2])
|
||||
self.sp_v.set_data(v1)
|
||||
|
||||
self.fig.add_subplot(self.gs[1, 0])
|
||||
self.sp_radial1.set_ydata(eta1.ravel());
|
||||
|
||||
self.fig.add_subplot(self.gs[1, 1])
|
||||
self.sp_x_axis1.set_ydata(eta1[(self.ny+2)/2,:])
|
||||
self.sp_y_axis1.set_ydata(eta1[:,(self.nx+2)/2])
|
||||
|
||||
self.fig.add_subplot(self.gs[1, 2])
|
||||
self.sp_x_diag1.set_ydata(np.diagonal(eta1, offset=-abs(self.nx-self.ny)/2))
|
||||
self.sp_y_diag1.set_ydata(np.diagonal(eta1.T, offset=abs(self.nx-self.ny)/2))
|
||||
|
||||
if (eta2 is not None):
|
||||
self.fig.add_subplot(self.gs[2, 0])
|
||||
self.sp_radial2.set_ydata(eta2.ravel());
|
||||
|
||||
self.fig.add_subplot(self.gs[2, 1])
|
||||
self.sp_x_axis2.set_ydata(eta2[(self.ny+2)/2,:])
|
||||
self.sp_y_axis2.set_ydata(eta2[:,(self.nx+2)/2])
|
||||
|
||||
self.fig.add_subplot(self.gs[2, 2])
|
||||
self.sp_x_diag2.set_ydata(np.diagonal(eta2, offset=-abs(self.nx-self.ny)/2))
|
||||
self.sp_y_diag2.set_ydata(np.diagonal(eta2.T, offset=abs(self.nx-self.ny)/2))
|
||||
|
||||
plt.draw()
|
||||
time.sleep(0.001)
|
||||
|
||||
|
||||
144
SWESimulators/WAF.py
Normal file
144
SWESimulators/WAF.py
Normal file
@@ -0,0 +1,144 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the Weighted average flux (WAF) described in
|
||||
E. Toro, Shock-Capturing methods for free-surface shallow flows, 2001
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
import pyopencl as cl #OpenCL in Python
|
||||
import Common
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class WAF:
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
cl_ctx, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
self.cl_ctx = cl_ctx
|
||||
|
||||
#Create an OpenCL command queue
|
||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
||||
|
||||
#Get kernels
|
||||
self.kernel = Common.get_kernel(self.cl_ctx, "WAF_kernel.opencl", block_width, block_height)
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
||||
)
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
#Dimensional splitting: second order accurate for every other timestep,
|
||||
#thus run two timesteps in a go
|
||||
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
#Along X, then Y
|
||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
np.int32(0), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
#Along Y, then X
|
||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
np.int32(1), \
|
||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.cl_data.download(self.cl_queue)
|
||||
|
||||
191
SWESimulators/WAF_kernel.opencl
Normal file
191
SWESimulators/WAF_kernel.opencl
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
This OpenCL kernel implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#include "common.opencl"
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the x axis for all faces
|
||||
*/
|
||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const float g_, const float dx_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
|
||||
// Q at interface from the right and left
|
||||
const float3 Ql2 = (float3)(Q[0][l][k-1], Q[1][l][k-1], Q[2][l][k-1]);
|
||||
const float3 Ql1 = (float3)(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
||||
const float3 Qr1 = (float3)(Q[0][l][k+1], Q[1][l][k+1], Q[2][l][k+1]);
|
||||
const float3 Qr2 = (float3)(Q[0][l][k+2], Q[1][l][k+2], Q[2][l][k+2]);
|
||||
|
||||
// Computed flux
|
||||
const float3 flux = WAF_1D_flux(Ql2, Ql1, Qr1, Qr2, g_, dx_, dt_);
|
||||
F[0][j][i] = flux.x;
|
||||
F[1][j][i] = flux.y;
|
||||
F[2][j][i] = flux.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Computes the flux along the y axis for all faces
|
||||
*/
|
||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const float g_, const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Compute fluxes along the y axis
|
||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
// Q at interface from the right and left
|
||||
// Note that we swap hu and hv
|
||||
const float3 Ql2 = (float3)(Q[0][l-1][k], Q[2][l-1][k], Q[1][l-1][k]);
|
||||
const float3 Ql1 = (float3)(Q[0][l ][k], Q[2][l ][k], Q[1][l ][k]);
|
||||
const float3 Qr1 = (float3)(Q[0][l+1][k], Q[2][l+1][k], Q[1][l+1][k]);
|
||||
const float3 Qr2 = (float3)(Q[0][l+2][k], Q[2][l+2][k], Q[1][l+2][k]);
|
||||
|
||||
// Computed flux
|
||||
// Note that we swap back
|
||||
const float3 flux = WAF_1D_flux(Ql2, Ql1, Qr1, Qr2, g_, dy_, dt_);
|
||||
G[0][j][i] = flux.x;
|
||||
G[1][j][i] = flux.z;
|
||||
G[2][j][i] = flux.y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
__kernel void swe_2D(
|
||||
int nx_, int ny_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float g_, int step_,
|
||||
|
||||
//Input h^n
|
||||
__global float* h0_ptr_, int h0_pitch_,
|
||||
__global float* hu0_ptr_, int hu0_pitch_,
|
||||
__global float* hv0_ptr_, int hv0_pitch_,
|
||||
|
||||
//Output h^{n+1}
|
||||
__global float* h1_ptr_, int h1_pitch_,
|
||||
__global float* hu1_ptr_, int hu1_pitch_,
|
||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
||||
//Shared memory variables
|
||||
__local float Q[3][block_height+4][block_width+4];
|
||||
__local float F[3][block_height+1][block_width+1];
|
||||
|
||||
|
||||
|
||||
//Read into shared memory Q from global memory
|
||||
readBlock2(h0_ptr_, h0_pitch_,
|
||||
hu0_ptr_, hu0_pitch_,
|
||||
hv0_ptr_, hv0_pitch_,
|
||||
Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
//Set boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
|
||||
|
||||
//Step 0 => evolve x first, then y
|
||||
if (step_ == 0) {
|
||||
//Compute fluxes along the x axis and evolve
|
||||
computeFluxF(Q, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Fix boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute fluxes along the y axis and evolve
|
||||
computeFluxG(Q, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
//Step 1 => evolve y first, then x
|
||||
else {
|
||||
//Compute fluxes along the y axis and evolve
|
||||
computeFluxG(Q, F, g_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Fix boundary conditions
|
||||
noFlowBoundary2(Q, nx_, ny_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//Compute fluxes along the x axis and evolve
|
||||
computeFluxF(Q, F, g_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Write to main memory for all internal cells
|
||||
writeBlock2(h1_ptr_, h1_pitch_,
|
||||
hu1_ptr_, hu1_pitch_,
|
||||
hv1_ptr_, hv1_pitch_,
|
||||
Q, nx_, ny_);
|
||||
}
|
||||
5
SWESimulators/__init__.py
Normal file
5
SWESimulators/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
#!/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
# Nothing general to do
|
||||
972
SWESimulators/common.opencl
Normal file
972
SWESimulators/common.opencl
Normal file
@@ -0,0 +1,972 @@
|
||||
/*
|
||||
This OpenCL kernel implements the Kurganov-Petrova numerical scheme
|
||||
for the shallow water equations, described in
|
||||
A. Kurganov & Guergana Petrova
|
||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
||||
Scheme for the Saint-Venant System Communications in Mathematical
|
||||
Sciences, 5 (2007), 133-160.
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Reads a block of data with one ghost cell for the shallow water equations
|
||||
*/
|
||||
void readBlock1(__global float* h_ptr_, int h_pitch_,
|
||||
__global float* hu_ptr_, int hu_pitch_,
|
||||
__global float* hv_ptr_, int hv_pitch_,
|
||||
__local float Q[3][block_height+2][block_width+2],
|
||||
const int nx_, const int ny_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Read into shared memory
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j, 0, ny_+1); // Out of bounds
|
||||
|
||||
//Compute the pointer to current row in the arrays
|
||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*l);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*l);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i, 0, nx_+1); // Out of bounds
|
||||
|
||||
Q[0][j][i] = h_row[k];
|
||||
Q[1][j][i] = hu_row[k];
|
||||
Q[2][j][i] = hv_row[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Reads a block of data with two ghost cells for the shallow water equations
|
||||
*/
|
||||
void readBlock2(__global float* h_ptr_, int h_pitch_,
|
||||
__global float* hu_ptr_, int hu_pitch_,
|
||||
__global float* hv_ptr_, int hv_pitch_,
|
||||
__local float Q[3][block_height+4][block_width+4],
|
||||
const int nx_, const int ny_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of block within domain
|
||||
const int bx = get_local_size(0) * get_group_id(0);
|
||||
const int by = get_local_size(1) * get_group_id(1);
|
||||
|
||||
//Read into shared memory
|
||||
for (int j=ty; j<block_height+4; j+=get_local_size(1)) {
|
||||
const int l = clamp(by + j, 0, ny_+3); // Out of bounds
|
||||
|
||||
//Compute the pointer to current row in the arrays
|
||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*l);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*l);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*l);
|
||||
|
||||
for (int i=tx; i<block_width+4; i+=get_local_size(0)) {
|
||||
const int k = clamp(bx + i, 0, nx_+3); // Out of bounds
|
||||
|
||||
Q[0][j][i] = h_row[k];
|
||||
Q[1][j][i] = hu_row[k];
|
||||
Q[2][j][i] = hv_row[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Writes a block of data to global memory for the shallow water equations.
|
||||
*/
|
||||
void writeBlock1(__global float* h_ptr_, int h_pitch_,
|
||||
__global float* hu_ptr_, int hu_pitch_,
|
||||
__global float* hv_ptr_, int hv_pitch_,
|
||||
__local float Q[3][block_height+2][block_width+2],
|
||||
const int nx_, const int ny_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||
const int tj = get_global_id(1) + 1;
|
||||
|
||||
//Only write internal cells
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
||||
const int j = ty + 1;
|
||||
|
||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*tj);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*tj);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*tj);
|
||||
|
||||
h_row[ti] = Q[0][j][i];
|
||||
hu_row[ti] = Q[1][j][i];
|
||||
hv_row[ti] = Q[2][j][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Writes a block of data to global memory for the shallow water equations.
|
||||
*/
|
||||
void writeBlock2(__global float* h_ptr_, int h_pitch_,
|
||||
__global float* hu_ptr_, int hu_pitch_,
|
||||
__global float* hv_ptr_, int hv_pitch_,
|
||||
__local float Q[3][block_height+4][block_width+4],
|
||||
const int nx_, const int ny_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||
const int tj = get_global_id(1) + 2;
|
||||
|
||||
//Only write internal cells
|
||||
if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) {
|
||||
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
||||
const int j = ty + 2;
|
||||
|
||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*tj);
|
||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*tj);
|
||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*tj);
|
||||
|
||||
h_row[ti] = Q[0][j][i];
|
||||
hu_row[ti] = Q[1][j][i];
|
||||
hv_row[ti] = Q[2][j][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* No flow boundary conditions for the shallow water equations
|
||||
* with one ghost cell in each direction
|
||||
*/
|
||||
void noFlowBoundary1(__local float Q[3][block_height+2][block_width+2], const int nx_, const int ny_) {
|
||||
//Global index
|
||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||
const int tj = get_global_id(1) + 1;
|
||||
|
||||
//Block-local indices
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
||||
const int j = ty + 1;
|
||||
|
||||
//Fix boundary conditions
|
||||
if (ti == 1) {
|
||||
Q[0][j][i-1] = Q[0][j][i];
|
||||
Q[1][j][i-1] = -Q[1][j][i];
|
||||
Q[2][j][i-1] = Q[2][j][i];
|
||||
}
|
||||
if (ti == nx_) {
|
||||
Q[0][j][i+1] = Q[0][j][i];
|
||||
Q[1][j][i+1] = -Q[1][j][i];
|
||||
Q[2][j][i+1] = Q[2][j][i];
|
||||
}
|
||||
if (tj == 1) {
|
||||
Q[0][j-1][i] = Q[0][j][i];
|
||||
Q[1][j-1][i] = Q[1][j][i];
|
||||
Q[2][j-1][i] = -Q[2][j][i];
|
||||
}
|
||||
if (tj == ny_) {
|
||||
Q[0][j+1][i] = Q[0][j][i];
|
||||
Q[1][j+1][i] = Q[1][j][i];
|
||||
Q[2][j+1][i] = -Q[2][j][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* No flow boundary conditions for the shallow water equations
|
||||
* with two ghost cells in each direction
|
||||
*/
|
||||
void noFlowBoundary2(__local float Q[3][block_height+4][block_width+4], const int nx_, const int ny_) {
|
||||
//Global index
|
||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||
const int tj = get_global_id(1) + 2;
|
||||
|
||||
//Block-local indices
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
||||
const int j = ty + 2;
|
||||
|
||||
if (ti == 2) {
|
||||
Q[0][j][i-1] = Q[0][j][i];
|
||||
Q[1][j][i-1] = -Q[1][j][i];
|
||||
Q[2][j][i-1] = Q[2][j][i];
|
||||
|
||||
Q[0][j][i-2] = Q[0][j][i+1];
|
||||
Q[1][j][i-2] = -Q[1][j][i+1];
|
||||
Q[2][j][i-2] = Q[2][j][i+1];
|
||||
}
|
||||
if (ti == nx_+1) {
|
||||
Q[0][j][i+1] = Q[0][j][i];
|
||||
Q[1][j][i+1] = -Q[1][j][i];
|
||||
Q[2][j][i+1] = Q[2][j][i];
|
||||
|
||||
Q[0][j][i+2] = Q[0][j][i-1];
|
||||
Q[1][j][i+2] = -Q[1][j][i-1];
|
||||
Q[2][j][i+2] = Q[2][j][i-1];
|
||||
}
|
||||
if (tj == 2) {
|
||||
Q[0][j-1][i] = Q[0][j][i];
|
||||
Q[1][j-1][i] = Q[1][j][i];
|
||||
Q[2][j-1][i] = -Q[2][j][i];
|
||||
|
||||
Q[0][j-2][i] = Q[0][j+1][i];
|
||||
Q[1][j-2][i] = Q[1][j+1][i];
|
||||
Q[2][j-2][i] = -Q[2][j+1][i];
|
||||
}
|
||||
if (tj == ny_+1) {
|
||||
Q[0][j+1][i] = Q[0][j][i];
|
||||
Q[1][j+1][i] = Q[1][j][i];
|
||||
Q[2][j+1][i] = -Q[2][j][i];
|
||||
|
||||
Q[0][j+2][i] = Q[0][j-1][i];
|
||||
Q[1][j+2][i] = Q[1][j-1][i];
|
||||
Q[2][j+2][i] = -Q[2][j-1][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Evolves the solution in time along the x axis (dimensional splitting)
|
||||
*/
|
||||
void evolveF1(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const int nx_, const int ny_,
|
||||
const float dx_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||
const int tj = get_global_id(1) + 1;
|
||||
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
||||
const int j = ty + 1;
|
||||
|
||||
Q[0][j][i] = Q[0][j][i] + (F[0][ty][tx] - F[0][ty][tx+1]) * dt_ / dx_;
|
||||
Q[1][j][i] = Q[1][j][i] + (F[1][ty][tx] - F[1][ty][tx+1]) * dt_ / dx_;
|
||||
Q[2][j][i] = Q[2][j][i] + (F[2][ty][tx] - F[2][ty][tx+1]) * dt_ / dx_;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Evolves the solution in time along the x axis (dimensional splitting)
|
||||
*/
|
||||
void evolveF2(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float F[3][block_height+1][block_width+1],
|
||||
const int nx_, const int ny_,
|
||||
const float dx_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||
const int tj = get_global_id(1) + 2;
|
||||
|
||||
if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) {
|
||||
const int i = tx + 2; //Skip local ghost cells, i.e., +1
|
||||
const int j = ty + 2;
|
||||
|
||||
Q[0][j][i] = Q[0][j][i] + (F[0][ty][tx] - F[0][ty][tx+1]) * dt_ / dx_;
|
||||
Q[1][j][i] = Q[1][j][i] + (F[1][ty][tx] - F[1][ty][tx+1]) * dt_ / dx_;
|
||||
Q[2][j][i] = Q[2][j][i] + (F[2][ty][tx] - F[2][ty][tx+1]) * dt_ / dx_;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||
*/
|
||||
void evolveG1(__local float Q[3][block_height+2][block_width+2],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const int nx_, const int ny_,
|
||||
const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||
const int tj = get_global_id(1) + 1;
|
||||
|
||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
||||
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
||||
const int j = ty + 1;
|
||||
|
||||
Q[0][j][i] = Q[0][j][i] + (G[0][ty][tx] - G[0][ty+1][tx]) * dt_ / dy_;
|
||||
Q[1][j][i] = Q[1][j][i] + (G[1][ty][tx] - G[1][ty+1][tx]) * dt_ / dy_;
|
||||
Q[2][j][i] = Q[2][j][i] + (G[2][ty][tx] - G[2][ty+1][tx]) * dt_ / dy_;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||
*/
|
||||
void evolveG2(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float G[3][block_height+1][block_width+1],
|
||||
const int nx_, const int ny_,
|
||||
const float dy_, const float dt_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Index of cell within domain
|
||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||
const int tj = get_global_id(1) + 2;
|
||||
|
||||
if (ti > 1 && ti < nx_+2 && tj > 1 && tj < ny_+2) {
|
||||
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
||||
const int j = ty + 2;
|
||||
|
||||
Q[0][j][i] = Q[0][j][i] + (G[0][ty][tx] - G[0][ty+1][tx]) * dt_ / dy_;
|
||||
Q[1][j][i] = Q[1][j][i] + (G[1][ty][tx] - G[1][ty+1][tx]) * dt_ / dy_;
|
||||
Q[2][j][i] = Q[2][j][i] + (G[2][ty][tx] - G[2][ty+1][tx]) * dt_ / dy_;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Reconstructs a slope using the minmod limiter based on three
|
||||
* consecutive values
|
||||
*/
|
||||
float minmodSlope(float left, float center, float right, float theta) {
|
||||
const float backward = (center - left) * theta;
|
||||
const float central = (right - left) * 0.5f;
|
||||
const float forward = (right - center) * theta;
|
||||
|
||||
return 0.25f
|
||||
*copysign(1.0f, backward)
|
||||
*(copysign(1.0f, backward) + copysign(1.0f, central))
|
||||
*(copysign(1.0f, central) + copysign(1.0f, forward))
|
||||
*min( min(fabs(backward), fabs(central)), fabs(forward) );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Reconstructs a minmod slope for a whole block along x
|
||||
*/
|
||||
void minmodSlopeX(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qx[3][block_height+2][block_width+2],
|
||||
const float theta_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
//Reconstruct slopes along x axis
|
||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
||||
const int l = j + 2; //Skip ghost cells
|
||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
||||
const int k = i + 1;
|
||||
for (int p=0; p<3; ++p) {
|
||||
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reconstructs a minmod slope for a whole block along y
|
||||
*/
|
||||
void minmodSlopeY(__local float Q[3][block_height+4][block_width+4],
|
||||
__local float Qy[3][block_height+2][block_width+2],
|
||||
const float theta_) {
|
||||
//Index of thread within block
|
||||
const int tx = get_local_id(0);
|
||||
const int ty = get_local_id(1);
|
||||
|
||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
||||
const int l = j + 1;
|
||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
||||
const int k = i + 2; //Skip ghost cells
|
||||
for (int p=0; p<3; ++p) {
|
||||
Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
float windStressX(int wind_stress_type_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
|
||||
float X = 0.0f;
|
||||
|
||||
switch (wind_stress_type_) {
|
||||
case 0: //UNIFORM_ALONGSHORE
|
||||
{
|
||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
||||
X = tau0_/rho_ * exp(-alpha_*y);
|
||||
}
|
||||
break;
|
||||
case 1: //BELL_SHAPED_ALONGSHORE
|
||||
if (t_ <= 48.0f*3600.0f) {
|
||||
const float a = alpha_*((get_global_id(0)+0.5f)*dx_-xm_);
|
||||
const float aa = a*a;
|
||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
||||
X = tau0_/rho_ * exp(-aa) * exp(-alpha_*y);
|
||||
}
|
||||
break;
|
||||
case 2: //MOVING_CYCLONE
|
||||
{
|
||||
const float x = (get_global_id(0))*dx_;
|
||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
||||
const float a = (x-x0_-u0_*(t_+dt_));
|
||||
const float aa = a*a;
|
||||
const float b = (y-y0_-v0_*(t_+dt_));
|
||||
const float bb = b*b;
|
||||
const float r = sqrt(aa+bb);
|
||||
const float c = 1.0f - r/Rc_;
|
||||
const float xi = c*c;
|
||||
|
||||
X = -(tau0_/rho_) * (b/Rc_) * exp(-0.5f*xi);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return X;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
float windStressY(int wind_stress_type_,
|
||||
float dx_, float dy_, float dt_,
|
||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
||||
float x0_, float y0_,
|
||||
float u0_, float v0_,
|
||||
float t_) {
|
||||
float Y = 0.0f;
|
||||
|
||||
switch (wind_stress_type_) {
|
||||
case 2: //MOVING_CYCLONE:
|
||||
{
|
||||
const float x = (get_global_id(0)+0.5f)*dx_;
|
||||
const float y = (get_global_id(1))*dy_;
|
||||
const float a = (x-x0_-u0_*(t_+dt_));
|
||||
const float aa = a*a;
|
||||
const float b = (y-y0_-v0_*(t_+dt_));
|
||||
const float bb = b*b;
|
||||
const float r = sqrt(aa+bb);
|
||||
const float c = 1.0f - r/Rc_;
|
||||
const float xi = c*c;
|
||||
|
||||
Y = (tau0_/rho_) * (a/Rc_) * exp(-0.5f*xi);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return Y;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
float3 F_func(const float3 Q, const float g) {
|
||||
float3 F;
|
||||
|
||||
F.x = Q.y; //hu
|
||||
F.y = Q.y*Q.y / Q.x + 0.5f*g*Q.x*Q.x; //hu*hu/h + 0.5f*g*h*h;
|
||||
F.z = Q.y*Q.z / Q.x; //hu*hv/h;
|
||||
|
||||
return F;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Central upwind flux function
|
||||
*/
|
||||
float3 CentralUpwindFlux(const float3 Qm, float3 Qp, const float g) {
|
||||
const float3 Fp = F_func(Qp, g);
|
||||
const float up = Qp.y / Qp.x; // hu / h
|
||||
const float cp = sqrt(g*Qp.x); // sqrt(g*h)
|
||||
|
||||
const float3 Fm = F_func(Qm, g);
|
||||
const float um = Qm.y / Qm.x; // hu / h
|
||||
const float cm = sqrt(g*Qm.x); // sqrt(g*h)
|
||||
|
||||
const float am = min(min(um-cm, up-cp), 0.0f); // largest negative wave speed
|
||||
const float ap = max(max(um+cm, up+cp), 0.0f); // largest positive wave speed
|
||||
|
||||
return ((ap*Fm - am*Fp) + ap*am*(Qp-Qm))/(ap-am);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Harten-Lax-van Leer with contact discontinuity (Toro 2001, p 180)
|
||||
*/
|
||||
float3 HLL_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
||||
const float h_l = Q_l.x;
|
||||
const float h_r = Q_r.x;
|
||||
|
||||
// Calculate velocities
|
||||
const float u_l = Q_l.y / h_l;
|
||||
const float u_r = Q_r.y / h_r;
|
||||
|
||||
// Estimate the potential wave speeds
|
||||
const float c_l = sqrt(g_*h_l);
|
||||
const float c_r = sqrt(g_*h_r);
|
||||
|
||||
// Compute h in the "star region", h^dagger
|
||||
const float h_dag = 0.5f * (h_l+h_r) - 0.25f * (u_r-u_l)*(h_l+h_r)/(c_l+c_r);
|
||||
|
||||
const float q_l_tmp = sqrt(0.5f * ( (h_dag+h_l)*h_dag / (h_l*h_l) ) );
|
||||
const float q_r_tmp = sqrt(0.5f * ( (h_dag+h_r)*h_dag / (h_r*h_r) ) );
|
||||
|
||||
const float q_l = (h_dag > h_l) ? q_l_tmp : 1.0f;
|
||||
const float q_r = (h_dag > h_r) ? q_r_tmp : 1.0f;
|
||||
|
||||
// Compute wave speed estimates
|
||||
const float S_l = u_l - c_l*q_l;
|
||||
const float S_r = u_r + c_r*q_r;
|
||||
|
||||
//Upwind selection
|
||||
if (S_l >= 0.0f) {
|
||||
return F_func(Q_l, g_);
|
||||
}
|
||||
else if (S_r <= 0.0f) {
|
||||
return F_func(Q_r, g_);
|
||||
}
|
||||
//Or estimate flux in the star region
|
||||
else {
|
||||
const float3 F_l = F_func(Q_l, g_);
|
||||
const float3 F_r = F_func(Q_r, g_);
|
||||
const float3 flux = (S_r*F_l - S_l*F_r + S_r*S_l*(Q_r - Q_l)) / (S_r-S_l);
|
||||
return flux;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Harten-Lax-van Leer with contact discontinuity (Toro 2001, p 181)
|
||||
*/
|
||||
float3 HLLC_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
||||
const float h_l = Q_l.x;
|
||||
const float h_r = Q_r.x;
|
||||
|
||||
// Calculate velocities
|
||||
const float u_l = Q_l.y / h_l;
|
||||
const float u_r = Q_r.y / h_r;
|
||||
|
||||
// Estimate the potential wave speeds
|
||||
const float c_l = sqrt(g_*h_l);
|
||||
const float c_r = sqrt(g_*h_r);
|
||||
|
||||
// Compute h in the "star region", h^dagger
|
||||
const float h_dag = 0.5f * (h_l+h_r) - 0.25f * (u_r-u_l)*(h_l+h_r)/(c_l+c_r);
|
||||
|
||||
const float q_l_tmp = sqrt(0.5f * ( (h_dag+h_l)*h_dag / (h_l*h_l) ) );
|
||||
const float q_r_tmp = sqrt(0.5f * ( (h_dag+h_r)*h_dag / (h_r*h_r) ) );
|
||||
|
||||
const float q_l = (h_dag > h_l) ? q_l_tmp : 1.0f;
|
||||
const float q_r = (h_dag > h_r) ? q_r_tmp : 1.0f;
|
||||
|
||||
// Compute wave speed estimates
|
||||
const float S_l = u_l - c_l*q_l;
|
||||
const float S_r = u_r + c_r*q_r;
|
||||
const float S_star = ( S_l*h_r*(u_r - S_r) - S_r*h_l*(u_l - S_l) ) / ( h_r*(u_r - S_r) - h_l*(u_l - S_l) );
|
||||
|
||||
const float3 F_l = F_func(Q_l, g_);
|
||||
const float3 F_r = F_func(Q_r, g_);
|
||||
|
||||
//Upwind selection
|
||||
if (S_l >= 0.0f) {
|
||||
return F_l;
|
||||
}
|
||||
else if (S_r <= 0.0f) {
|
||||
return F_r;
|
||||
}
|
||||
//Or estimate flux in the "left star" region
|
||||
else if (S_l <= 0.0f && 0.0f <=S_star) {
|
||||
const float v_l = Q_l.z / h_l;
|
||||
const float3 Q_star_l = h_l * (S_l - u_l) / (S_l - S_star) * (float3)(1, S_star, v_l);
|
||||
const float3 flux = F_l + S_l*(Q_star_l - Q_l);
|
||||
return flux;
|
||||
}
|
||||
//Or estimate flux in the "righ star" region
|
||||
else if (S_star <= 0.0f && 0.0f <=S_r) {
|
||||
const float v_r = Q_r.z / h_r;
|
||||
const float3 Q_star_r = h_r * (S_r - u_r) / (S_r - S_star) * (float3)(1, S_star, v_r);
|
||||
const float3 flux = F_r + S_r*(Q_star_r - Q_r);
|
||||
return flux;
|
||||
}
|
||||
else {
|
||||
return -99999.9f; //Something wrong here
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Superbee flux limiter for WAF.
|
||||
* Related to superbee limiter so that WAF_superbee(r, c) = 1 - (1-|c|)*superbee(r)
|
||||
* @param r_ the ratio of upwind change (see Toro 2001, p. 203/204)
|
||||
* @param c_ the courant number for wave k, dt*S_k/dx
|
||||
*/
|
||||
float WAF_superbee(float r_, float c_) {
|
||||
// r <= 0.0
|
||||
if (r_ <= 0.0f) {
|
||||
return 1.0f;
|
||||
}
|
||||
// 0.0 <= r <= 1/2
|
||||
else if (r_ <= 0.5f) {
|
||||
return 1.0f - 2.0f*(1.0f - fabs(c_))*r_;
|
||||
}
|
||||
// 1/2 <= r <= 1
|
||||
else if (r_ <= 1.0f) {
|
||||
return fabs(c_);
|
||||
}
|
||||
// 1 <= r <= 2
|
||||
else if (r_ <= 2.0f) {
|
||||
return 1.0f - (1.0f - fabs(c_))*r_;
|
||||
}
|
||||
// r >= 2
|
||||
else {
|
||||
return 2.0f*fabs(c_) - 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
float WAF_albada(float r_, float c_) {
|
||||
if (r_ <= 0.0f) {
|
||||
return 1.0f;
|
||||
}
|
||||
else {
|
||||
return 1.0f - (1.0f - fabs(c_)) * r_ * (1.0f + r_) / (1.0f + r_*r_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float WAF_minbee(float r_, float c_) {
|
||||
if (r_ <= 0.0f) {
|
||||
return 1.0f;
|
||||
}
|
||||
else if (r_ <= 1.0f) {
|
||||
return 1.0f - (1.0f - fabs(c_))*r_;
|
||||
}
|
||||
else {
|
||||
return fabs(c_);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
float WAF_minmod(float r_, float c_) {
|
||||
if (r_ <= 0.0f) {
|
||||
return fabs(c_);
|
||||
}
|
||||
else if (r_ <= 1.0f) {
|
||||
return (1.0f - r_) * (1.0f - c_);
|
||||
}
|
||||
else {
|
||||
return 1.0f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Weighted average flux (Toro 2001, p 200) for interface {i+1/2}
|
||||
* @param r_ The flux limiter parameter (see Toro 2001, p. 203)
|
||||
* @param Q_l2 Q_{i-1}
|
||||
* @param Q_l1 Q_{i}
|
||||
* @param Q_r1 Q_{i+1}
|
||||
* @param Q_r2 Q_{i+2}
|
||||
*/
|
||||
float3 WAF_1D_flux(const float3 Q_l2, const float3 Q_l1, const float3 Q_r1, const float3 Q_r2, const float g_, const float dx_, const float dt_) {
|
||||
const float h_l = Q_l1.x;
|
||||
const float h_r = Q_r1.x;
|
||||
|
||||
const float h_l2 = Q_l2.x;
|
||||
const float h_r2 = Q_r2.x;
|
||||
|
||||
// Calculate velocities
|
||||
const float u_l = Q_l1.y / h_l;
|
||||
const float u_r = Q_r1.y / h_r;
|
||||
|
||||
const float v_l = Q_l1.z / h_l;
|
||||
const float v_r = Q_r1.z / h_r;
|
||||
|
||||
const float v_l2 = Q_l2.z / h_l2;
|
||||
const float v_r2 = Q_r2.z / h_r2;
|
||||
|
||||
// Estimate the potential wave speeds
|
||||
const float c_l = sqrt(g_*h_l);
|
||||
const float c_r = sqrt(g_*h_r);
|
||||
|
||||
// Compute h in the "star region", h^dagger
|
||||
const float h_dag = 0.5f * (h_l+h_r) - 0.25f * (u_r-u_l)*(h_l+h_r)/(c_l+c_r);
|
||||
|
||||
const float q_l_tmp = sqrt(0.5f * ( (h_dag+h_l)*h_dag / (h_l*h_l) ) );
|
||||
const float q_r_tmp = sqrt(0.5f * ( (h_dag+h_r)*h_dag / (h_r*h_r) ) );
|
||||
|
||||
const float q_l = (h_dag > h_l) ? q_l_tmp : 1.0f;
|
||||
const float q_r = (h_dag > h_r) ? q_r_tmp : 1.0f;
|
||||
|
||||
// Compute wave speed estimates
|
||||
const float S_l = u_l - c_l;//*q_l;
|
||||
const float S_r = u_r + c_r;//*q_r;
|
||||
const float S_star = ( S_l*h_r*(u_r - S_r) - S_r*h_l*(u_l - S_l) ) / ( h_r*(u_r - S_r) - h_l*(u_l - S_l) );
|
||||
|
||||
const float3 Q_star_l = h_l * (S_l - u_l) / (S_l - S_star) * (float3)(1, S_star, v_l);
|
||||
const float3 Q_star_r = h_r * (S_r - u_r) / (S_r - S_star) * (float3)(1, S_star, v_r);
|
||||
|
||||
// Estimate the fluxes in the four regions
|
||||
const float3 F_1 = F_func(Q_l1, g_);
|
||||
const float3 F_4 = F_func(Q_r1, g_);
|
||||
|
||||
const float3 F_2 = F_1 + S_l*(Q_star_l - Q_l1);
|
||||
const float3 F_3 = F_4 + S_r*(Q_star_r - Q_r1);
|
||||
//const float3 F_2 = F_func(Q_star_l, g_);
|
||||
//const float3 F_3 = F_func(Q_star_r, g_);
|
||||
|
||||
// Compute the courant numbers for the waves
|
||||
const float c_1 = S_l * dt_ / dx_;
|
||||
const float c_2 = S_star * dt_ / dx_;
|
||||
const float c_3 = S_r * dt_ / dx_;
|
||||
|
||||
// Compute the "upwind change" vectors for the i-3/2 and i+3/2 interfaces
|
||||
// We use h for the tangential direction, and v for the normal direction
|
||||
const float dh = h_r - h_l;
|
||||
const float rh_m = (h_l - h_l2) / dh;
|
||||
const float rh_p = (h_r2 - h_r) / dh;
|
||||
|
||||
const float dv = v_r - v_l;
|
||||
const float rv_m = (v_l - v_l2) / dv;
|
||||
const float rv_p = (v_r2 - v_r) / dv;
|
||||
|
||||
// Compute the r parameters for the flux limiter
|
||||
// Note that you use h for h and hu, and v for hv component/equation
|
||||
const float rh_1 = (c_1 > 0.0f) ? rh_m : rh_p;
|
||||
const float rv_1 = (c_1 > 0.0f) ? rv_m : rv_p;
|
||||
|
||||
const float rh_2 = (c_2 > 0.0f) ? rh_m : rh_p;
|
||||
const float rv_2 = (c_2 > 0.0f) ? rv_m : rv_p;
|
||||
|
||||
const float rh_3 = (c_3 > 0.0f) ? rh_m : rh_p;
|
||||
const float rv_3 = (c_3 > 0.0f) ? rv_m : rv_p;
|
||||
|
||||
// Compute the limiter
|
||||
const float A_1 = sign(c_1)*WAF_minbee(rh_1, c_1);
|
||||
const float A_2 = sign(c_2)*WAF_minbee(rv_2, c_2);
|
||||
const float A_3 = sign(c_3)*WAF_minbee(rh_3, c_3);
|
||||
|
||||
//Average the fluxes
|
||||
const float3 flux = 0.5f*( F_1 + F_4 )
|
||||
- 0.5f*( A_1 * (F_2 - F_1)
|
||||
+ A_2 * (F_3 - F_2)
|
||||
+ A_3 * (F_4 - F_3) );
|
||||
|
||||
/*
|
||||
const float d_0 = -1.0f;
|
||||
const float d_1 = -0.5f;//max(min(sign(c_1)*WAF_minbee(rh_1, c_1), 1.0f), -1.0f);
|
||||
const float d_2 = 0.0f;//max(min(sign(c_2)*WAF_minbee(rh_2, c_2), 1.0f), -1.0f);
|
||||
const float d_3 = 0.5f;//max(min(sign(c_3)*WAF_minbee(rh_3, c_3), 1.0f), -1.0f);
|
||||
const float d_4 = 1.0f;
|
||||
const float3 flux = 0.5f*(d_1 - d_0) * F_1
|
||||
+ 0.5f*(d_2 - d_1) * F_2
|
||||
+ 0.5f*(d_3 - d_2) * F_3
|
||||
+ 0.5f*(d_4 - d_3) * F_4;
|
||||
*/
|
||||
/*
|
||||
const float3 F_hllc = (S_r*F_1 - S_l*F_4 + S_r*S_l*(Q_r1 - Q_l1)) / (S_r-S_l);
|
||||
const float3 flux = 0.5f*(d_1 - d_0) * F_1
|
||||
+ 0.5f*(d_3 - d_1) * F_hllc
|
||||
+ 0.5f*(d_4 - d_3) * F_4;
|
||||
*/
|
||||
/*
|
||||
const float c_0 = -1.0f;
|
||||
const float c_4 = 1.0f;
|
||||
const float3 flux = 0.5f*(c_1 - c_0) * F_1
|
||||
+ 0.5f*(c_2 - c_1) * F_2
|
||||
+ 0.5f*(c_3 - c_2) * F_3
|
||||
+ 0.5f*(c_4 - c_3) * F_4;
|
||||
*/
|
||||
//const float3 flux = 0.5f*( F_1 + F_4 ) - 0.5f*( sign(c_3) * A_3 * (F_4 - F_3) );
|
||||
return flux;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Lax-Friedrichs flux (Toro 2001, p 163)
|
||||
*/
|
||||
float3 LxF_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||
const float3 F_l = F_func(Q_l, g_);
|
||||
const float3 F_r = F_func(Q_r, g_);
|
||||
|
||||
return 0.5f*(F_l + F_r) + (Q_l - Q_r) * dx_ / (2.0f*dt_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Lax-Friedrichs extended to 2D
|
||||
*/
|
||||
float3 LxF_2D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||
const float3 F_l = F_func(Q_l, g_);
|
||||
const float3 F_r = F_func(Q_r, g_);
|
||||
|
||||
//Note numerical diffusion for 2D here (0.25)
|
||||
return 0.5f*(F_l + F_r) + (Q_l - Q_r) * dx_ / (4.0f*dt_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Richtmeyer / Two-step Lax-Wendroff flux (Toro 2001, p 164)
|
||||
*/
|
||||
float3 LxW2_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||
const float3 F_l = F_func(Q_l, g_);
|
||||
const float3 F_r = F_func(Q_r, g_);
|
||||
|
||||
const float3 Q_lw2 = 0.5f*(Q_l + Q_r) + (F_l - F_r)*dt_/(2.0f*dx_);
|
||||
|
||||
return F_func(Q_lw2, g_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Godunovs centered scheme (Toro 2001, p 165)
|
||||
*/
|
||||
float3 GodC_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||
const float3 F_l = F_func(Q_l, g_);
|
||||
const float3 F_r = F_func(Q_r, g_);
|
||||
|
||||
const float3 Q_godc = 0.5f*(Q_l + Q_r) + (F_l - F_r)*dt_/dx_;
|
||||
|
||||
return F_func(Q_godc, g_);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* First Ordered Centered (Toro 2001, p.163)
|
||||
*/
|
||||
float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||
const float3 F_lf = LxF_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
||||
const float3 F_lw2 = LxW2_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
||||
return 0.5f*(F_lf + F_lw2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user