mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
commit
ed48305953
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
494
OpenCL to CUDA.ipynb
Normal file
494
OpenCL to CUDA.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,765 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"```\n",
|
|
||||||
"This notebook sets up and runs a set of benchmarks to compare\n",
|
|
||||||
"different numerical discretizations of the SWEs\n",
|
|
||||||
"\n",
|
|
||||||
"Copyright (C) 2016 SINTEF ICT\n",
|
|
||||||
"\n",
|
|
||||||
"This program is free software: you can redistribute it and/or modify\n",
|
|
||||||
"it under the terms of the GNU General Public License as published by\n",
|
|
||||||
"the Free Software Foundation, either version 3 of the License, or\n",
|
|
||||||
"(at your option) any later version.\n",
|
|
||||||
"\n",
|
|
||||||
"This program is distributed in the hope that it will be useful,\n",
|
|
||||||
"but WITHOUT ANY WARRANTY; without even the implied warranty of\n",
|
|
||||||
"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n",
|
|
||||||
"GNU General Public License for more details.\n",
|
|
||||||
"\n",
|
|
||||||
"You should have received a copy of the GNU General Public License\n",
|
|
||||||
"along with this program. If not, see <http://www.gnu.org/licenses/>.\n",
|
|
||||||
"```"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Import modules and set up environment"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Lets have matplotlib \"inline\"\n",
|
|
||||||
"%matplotlib inline\n",
|
|
||||||
"%config InlineBackend.figure_format = 'retina'\n",
|
|
||||||
"\n",
|
|
||||||
"#Import packages we need\n",
|
|
||||||
"import numpy as np\n",
|
|
||||||
"from matplotlib import animation, rc\n",
|
|
||||||
"from matplotlib import pyplot as plt\n",
|
|
||||||
"\n",
|
|
||||||
"import os\n",
|
|
||||||
"import pyopencl\n",
|
|
||||||
"import datetime\n",
|
|
||||||
"import sys\n",
|
|
||||||
"\n",
|
|
||||||
"#Set large figure sizes\n",
|
|
||||||
"rc('figure', figsize=(6.0, 4.0))\n",
|
|
||||||
"rc('animation', html='html5')\n",
|
|
||||||
"\n",
|
|
||||||
"#Import our simulator\n",
|
|
||||||
"from SWESimulators import FBL, CTCS,KP07, CDKLM16, PlotHelper, Common\n",
|
|
||||||
"#Import initial condition and bathymetry generating functions:\n",
|
|
||||||
"from SWESimulators.BathymetryAndICs import *"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Make sure we get compiler output from OpenCL\n",
|
|
||||||
"os.environ[\"PYOPENCL_COMPILER_OUTPUT\"] = \"1\"\n",
|
|
||||||
"\n",
|
|
||||||
"#Set which CL device to use, and disable kernel caching\n",
|
|
||||||
"if (str.lower(sys.platform).startswith(\"linux\")):\n",
|
|
||||||
" os.environ[\"PYOPENCL_CTX\"] = \"0\"\n",
|
|
||||||
"else:\n",
|
|
||||||
" os.environ[\"PYOPENCL_CTX\"] = \"1\"\n",
|
|
||||||
"os.environ[\"CUDA_CACHE_DISABLE\"] = \"1\"\n",
|
|
||||||
"os.environ[\"PYOPENCL_COMPILER_OUTPUT\"] = \"1\"\n",
|
|
||||||
"os.environ[\"PYOPENCL_NO_CACHE\"] = \"1\"\n",
|
|
||||||
"\n",
|
|
||||||
"#Create OpenCL context\n",
|
|
||||||
"cl_ctx = pyopencl.create_some_context()\n",
|
|
||||||
"print \"Using \", cl_ctx.devices[0].name"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Create output directory for images\n",
|
|
||||||
"imgdir='images_convergence_' + datetime.datetime.now().strftime(\"%Y_%m_%d-%H_%M_%S\")\n",
|
|
||||||
"os.makedirs(imgdir)\n",
|
|
||||||
"print \"Saving images to \" + imgdir"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def setBwStyles(ax):\n",
|
|
||||||
" from cycler import cycler\n",
|
|
||||||
"\n",
|
|
||||||
" ax.set_prop_cycle( cycler('marker', ['.', 'x', 4, '+', '*', '1']) +\n",
|
|
||||||
" cycler('linestyle', ['-.', '--', ':', '-.', '--', ':']) +\n",
|
|
||||||
" #cycler('markersize', [15, 15, 15, 15, 15, 15]) +\n",
|
|
||||||
" cycler('color', ['k', 'k', 'k', 'k', 'k', 'k']) )"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def rebin(a, *args):\n",
|
|
||||||
" '''rebin ndarray data into a smaller ndarray of the same rank whose dimensions\n",
|
|
||||||
" are factors of the original dimensions. eg. An array with 6 columns and 4 rows\n",
|
|
||||||
" can be reduced to have 6,3,2 or 1 columns and 4,2 or 1 rows.\n",
|
|
||||||
" example usages:\n",
|
|
||||||
" >>> a=rand(6,4); b=rebin(a,3,2)\n",
|
|
||||||
" >>> a=rand(6); b=rebin(a,2)\n",
|
|
||||||
" '''\n",
|
|
||||||
" shape = a.shape\n",
|
|
||||||
" lenShape = len(shape)\n",
|
|
||||||
" factor = np.asarray(shape)/np.asarray(args)\n",
|
|
||||||
" evList = ['a.reshape('] + \\\n",
|
|
||||||
" ['args[%d],factor[%d],'%(i,i) for i in range(lenShape)] + \\\n",
|
|
||||||
" [')'] + ['.sum(%d)'%(i+1) for i in range(lenShape)] + \\\n",
|
|
||||||
" ['/factor[%d]'%i for i in range(lenShape)]\n",
|
|
||||||
" #print ''.join(evList)\n",
|
|
||||||
" return eval(''.join(evList))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Global parameters"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"width = 512000\n",
|
|
||||||
"height = 512000\n",
|
|
||||||
"\n",
|
|
||||||
"domain_sizes = [16, 32, 64, 128, 256]#, 512, 1024, 2048, 4096]\n",
|
|
||||||
"reference_domain_size = 4 * max(domain_sizes)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"#schemes = [\"FBL\"] \n",
|
|
||||||
"schemes = [\"FBL\", \"CTCS\", \"KP\", \"CDKLM\"]\n",
|
|
||||||
"\n",
|
|
||||||
"#Timestep size \n",
|
|
||||||
"dt = 8000/reference_domain_size\n",
|
|
||||||
" \n",
|
|
||||||
"g = 9.81\n",
|
|
||||||
"r = 0.0\n",
|
|
||||||
"\n",
|
|
||||||
"# Coriolis parameters: f + beta * y\n",
|
|
||||||
"f = 8.0e-5\n",
|
|
||||||
"\n",
|
|
||||||
"timesteps = 5\n",
|
|
||||||
"\n",
|
|
||||||
"end_time = (timesteps - 0.01)*dt\n",
|
|
||||||
"make_netCDF = False\n",
|
|
||||||
"\n",
|
|
||||||
"print(\"Timesteps = \" + str(end_time / dt))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def initDataBump(h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f):\n",
|
|
||||||
" \n",
|
|
||||||
" waterHeight = 50\n",
|
|
||||||
" \n",
|
|
||||||
" def my_exp(i, j):\n",
|
|
||||||
" size = 0.3\n",
|
|
||||||
" x = (i + 0.5 - reference_domain_size/2.0) / float(reference_domain_size)\n",
|
|
||||||
" y = (j + 0.5 - reference_domain_size/2.0) / float(reference_domain_size)\n",
|
|
||||||
" return np.exp(-10*(x*x/(size*size)+y*y/(size*size))) * (np.sqrt(x**2 + y**2) < size)\n",
|
|
||||||
" \n",
|
|
||||||
" def my_cos(i, j):\n",
|
|
||||||
" size = 0.6\n",
|
|
||||||
" x = 2*(i + 0.5 - reference_domain_size/2.0) / float(reference_domain_size)\n",
|
|
||||||
" y = 2*(j + 0.5 - reference_domain_size/2.0) / float(reference_domain_size)\n",
|
|
||||||
" r = np.sqrt(x**2 + y**2)\n",
|
|
||||||
" return 0.5*(1.0 + np.cos(np.pi*r/size)) * (r < size)\n",
|
|
||||||
" \n",
|
|
||||||
" #Generate disturbance at reference scale and downsample \n",
|
|
||||||
" disturbance = np.fromfunction(lambda i, j: my_cos(i,j), (reference_domain_size, reference_domain_size)) \n",
|
|
||||||
" disturbance = rebin(disturbance, nx, ny)\n",
|
|
||||||
" \n",
|
|
||||||
" validCells = [ghosts[2], eta0.shape[0] - ghosts[0], ghosts[3], eta0.shape[1] - ghosts[1]]\n",
|
|
||||||
" \n",
|
|
||||||
" eta0.fill(0.0)\n",
|
|
||||||
" eta0[validCells[0]:validCells[1], validCells[2]:validCells[3]] += (0.01*disturbance)\n",
|
|
||||||
" h0.fill(waterHeight)\n",
|
|
||||||
" u0.fill(0.0)\n",
|
|
||||||
" v0.fill(0.0)\n",
|
|
||||||
"\n",
|
|
||||||
"def initDataBalancedBump(h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f):\n",
|
|
||||||
" bump_posx = 0.5\n",
|
|
||||||
" bump_posy = 0.5\n",
|
|
||||||
" bump_height = 0.25\n",
|
|
||||||
" bump_width_factor = 20*nx\n",
|
|
||||||
" waterHeight = 50 \n",
|
|
||||||
" initializeBalancedBumpOverPoint(eta0, u0, v0, # allocated buffers to be filled with data (output)\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, # grid data\n",
|
|
||||||
" bump_posx, bump_posy, # relative placement of bump center\n",
|
|
||||||
" bump_height, bump_width_factor, # bump information\n",
|
|
||||||
" f, waterHeight, # parameters defined at the bump centre (coriolis force, water depth)\n",
|
|
||||||
" g)\n",
|
|
||||||
" \n",
|
|
||||||
" # Scale eta to be out of geostrophic balance\n",
|
|
||||||
" eta0 *= 1.1\n",
|
|
||||||
" h0.fill(waterHeight);\n",
|
|
||||||
" \n",
|
|
||||||
"def initData(h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f):\n",
|
|
||||||
" initDataBump(h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f)\n",
|
|
||||||
" \n",
|
|
||||||
"def testInitData(domain_size):\n",
|
|
||||||
" \n",
|
|
||||||
" nx = domain_size\n",
|
|
||||||
" ny = domain_size\n",
|
|
||||||
" \n",
|
|
||||||
" dx = float(width/nx)\n",
|
|
||||||
" dy = float(height/ny)\n",
|
|
||||||
" \n",
|
|
||||||
" ghosts = [1, 1, 1, 1] # north, east, south, west\n",
|
|
||||||
" dataShape = (ny + ghosts[0]+ghosts[2], \n",
|
|
||||||
" nx + ghosts[1]+ghosts[3])\n",
|
|
||||||
"\n",
|
|
||||||
" h0 = np.zeros(dataShape, dtype=np.float32);\n",
|
|
||||||
" eta0 = np.zeros(dataShape, dtype=np.float32);\n",
|
|
||||||
" u0 = np.zeros((dataShape[0], dataShape[1]+1), dtype=np.float32);\n",
|
|
||||||
" v0 = np.zeros((dataShape[0]+1, dataShape[1]), dtype=np.float32);\n",
|
|
||||||
" \n",
|
|
||||||
" initData(h0, eta0, u0, v0, nx, ny, dx, dy, ghosts, g, f)\n",
|
|
||||||
" \n",
|
|
||||||
" return eta0\n",
|
|
||||||
" \n",
|
|
||||||
"plt.figure()\n",
|
|
||||||
"for i, domain_size in enumerate(domain_sizes):\n",
|
|
||||||
" eta0 = testInitData(domain_size)\n",
|
|
||||||
" plt.subplot(1, len(domain_sizes)+1, i+1)\n",
|
|
||||||
" plt.imshow(eta0, interpolation='nearest')\n",
|
|
||||||
" print(\"Max={:.05f}, min={:.05f}, sum={:.010f}\".format(np.max(eta0), np.min(eta0), np.sum(eta0/(domain_size*domain_size))))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def plotData(eta0, u0, v0, eta1, u1, v1):\n",
|
|
||||||
" fig, axarr = plt.subplots(2, 3)\n",
|
|
||||||
" axarr[0, 0].imshow(eta0, interpolation=\"nearest\")\n",
|
|
||||||
" axarr[0, 1].imshow(u0, interpolation=\"nearest\")\n",
|
|
||||||
" axarr[0, 2].imshow(v0, interpolation=\"nearest\")\n",
|
|
||||||
" axarr[1, 0].imshow(eta1, interpolation=\"nearest\")\n",
|
|
||||||
" axarr[1, 1].imshow(u1, interpolation=\"nearest\")\n",
|
|
||||||
" axarr[1, 2].imshow(v1, interpolation=\"nearest\")\n",
|
|
||||||
" print(\"Eta0: Maximum = {:.05f}, minimum = {:.05f}\".format(np.max(eta0), np.min(eta0)))\n",
|
|
||||||
" print(\"Eta1: Maximum = {:.05f}, minimum = {:.05f}\".format(np.max(eta1), np.min(eta1)))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Forward Backward Linear"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def runFBL(domain_size):\n",
|
|
||||||
" #Clean up old simulator if any:\n",
|
|
||||||
" if 'fbl_sim' in globals():\n",
|
|
||||||
" fbl_sim.cleanUp()\n",
|
|
||||||
" \n",
|
|
||||||
" nx = domain_size\n",
|
|
||||||
" ny = domain_size\n",
|
|
||||||
" \n",
|
|
||||||
" dx = float(width/nx)\n",
|
|
||||||
" dy = float(height/ny)\n",
|
|
||||||
" \n",
|
|
||||||
" ghosts = [0, 0, 0, 0] # north, east, south, west\n",
|
|
||||||
" dataShape = (ny + ghosts[0]+ghosts[2], \n",
|
|
||||||
" nx + ghosts[1]+ghosts[3])\n",
|
|
||||||
"\n",
|
|
||||||
" h0 = np.zeros(dataShape, dtype=np.float32);\n",
|
|
||||||
" eta0 = np.zeros(dataShape, dtype=np.float32);\n",
|
|
||||||
" u0 = np.zeros((dataShape[0], dataShape[1]+1), dtype=np.float32);\n",
|
|
||||||
" v0 = np.zeros((dataShape[0]+1, dataShape[1]), dtype=np.float32);\n",
|
|
||||||
"\n",
|
|
||||||
" # Generate bump in geostrophic balance\n",
|
|
||||||
" initData(h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f)\n",
|
|
||||||
"\n",
|
|
||||||
" #Initialize simulator\n",
|
|
||||||
" reload(FBL)\n",
|
|
||||||
" fbl_sim = FBL.FBL(cl_ctx, \\\n",
|
|
||||||
" h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, \\\n",
|
|
||||||
" dx, dy, dt, \\\n",
|
|
||||||
" g, f, r, \\\n",
|
|
||||||
" write_netcdf=make_netCDF)\n",
|
|
||||||
"\n",
|
|
||||||
" t = fbl_sim.step(end_time)\n",
|
|
||||||
" eta1, u1, v1 = fbl_sim.download()\n",
|
|
||||||
" print \"\\t\\tt=\" + str(t) + \"\\tMax eta: \" + str(np.max(eta1))\n",
|
|
||||||
" \n",
|
|
||||||
" return [eta0, u0, v0, eta1, u1, v1]\n",
|
|
||||||
"\n",
|
|
||||||
"[eta0, u0, v0, eta1, u1, v1] = runFBL(16)\n",
|
|
||||||
"plotData(eta0, u0, v0, eta1, u1, v1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if make_netCDF:\n",
|
|
||||||
" fbl_sim.cleanUp()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Centered in time, centered in space"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"scrolled": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"#Centered in time, centered in space\n",
|
|
||||||
"\n",
|
|
||||||
"def runCTCS(domain_size):\n",
|
|
||||||
" #Clean up old simulator if any:\n",
|
|
||||||
" if 'ctcs_sim' in globals():\n",
|
|
||||||
" ctcs_sim.cleanUp()\n",
|
|
||||||
" \n",
|
|
||||||
" nx = domain_size\n",
|
|
||||||
" ny = domain_size\n",
|
|
||||||
" \n",
|
|
||||||
" dx = float(width/nx)\n",
|
|
||||||
" dy = float(height/ny)\n",
|
|
||||||
" \n",
|
|
||||||
" ghosts = [1,1,1,1] # north, east, south, west\n",
|
|
||||||
" validDomain = np.array([1,1,1,1])\n",
|
|
||||||
" dataShape = (ny + ghosts[0]+ghosts[2], \n",
|
|
||||||
" nx + ghosts[1]+ghosts[3])\n",
|
|
||||||
"\n",
|
|
||||||
" h0 = np.zeros(dataShape, dtype=np.float32);\n",
|
|
||||||
" eta0 = np.zeros(dataShape, dtype=np.float32);\n",
|
|
||||||
" u0 = np.zeros((dataShape[0], dataShape[1]+1), dtype=np.float32);\n",
|
|
||||||
" v0 = np.zeros((dataShape[0]+1, dataShape[1]), dtype=np.float32); \n",
|
|
||||||
"\n",
|
|
||||||
" initData(h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f)\n",
|
|
||||||
" \n",
|
|
||||||
" # Eddy viscocity parameter\n",
|
|
||||||
" A = 0.5*dx\n",
|
|
||||||
" \n",
|
|
||||||
" reload(CTCS)\n",
|
|
||||||
" ctcs_sim = CTCS.CTCS(cl_ctx, \\\n",
|
|
||||||
" h0, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, dt, \\\n",
|
|
||||||
" g, f, r, A, \\\n",
|
|
||||||
" write_netcdf=make_netCDF)\n",
|
|
||||||
"\n",
|
|
||||||
" t = ctcs_sim.step(end_time)\n",
|
|
||||||
" eta1, u1, v1 = ctcs_sim.download()\n",
|
|
||||||
" \n",
|
|
||||||
" # Remove ghost cells\n",
|
|
||||||
" eta1 = eta1[validDomain[3]:-validDomain[1], validDomain[2]:-validDomain[0]]\n",
|
|
||||||
" \n",
|
|
||||||
" print \"\\t\\tt=\" + str(t) + \"\\tMax eta: \" + str(np.max(eta1))\n",
|
|
||||||
" \n",
|
|
||||||
" return [eta0, u0, v0, eta1, u1, v1]\n",
|
|
||||||
"\n",
|
|
||||||
"[eta0, u0, v0, eta1, u1, v1] = runCTCS(16)\n",
|
|
||||||
"plotData(eta0, u0, v0, eta1, u1, v1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if make_netCDF:\n",
|
|
||||||
" ctcs_sim.cleanUp()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## CDKLM 16"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"scrolled": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def runCDKLM(domain_size):\n",
|
|
||||||
" #Clean up old simulator if any:\n",
|
|
||||||
" if 'cdklm_sim' in globals():\n",
|
|
||||||
" cdklm_sim.cleanUp()\n",
|
|
||||||
"\n",
|
|
||||||
" #Coriolis well balanced reconstruction scheme\n",
|
|
||||||
" \n",
|
|
||||||
" nx = domain_size\n",
|
|
||||||
" ny = domain_size\n",
|
|
||||||
" \n",
|
|
||||||
" dx = float(width/nx)\n",
|
|
||||||
" dy = float(height/ny)\n",
|
|
||||||
"\n",
|
|
||||||
" ghosts = np.array([2,2,2,2]) # north, east, south, west\n",
|
|
||||||
" validDomain = np.array([2,2,2,2])\n",
|
|
||||||
" dataShape = (ny + ghosts[0]+ghosts[2], \n",
|
|
||||||
" nx + ghosts[1]+ghosts[3])\n",
|
|
||||||
"\n",
|
|
||||||
" Hi = np.zeros((dataShape[0]+1, dataShape[1]+1), dtype=np.float32)\n",
|
|
||||||
" eta0 = np.zeros(dataShape, dtype=np.float32)\n",
|
|
||||||
" u0 = np.zeros(dataShape, dtype=np.float32)\n",
|
|
||||||
" v0 = np.zeros(dataShape, dtype=np.float32)\n",
|
|
||||||
"\n",
|
|
||||||
" initData(Hi, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f)\n",
|
|
||||||
"\n",
|
|
||||||
" #Initialize simulator\n",
|
|
||||||
" reload(CDKLM16)\n",
|
|
||||||
" cdklm_sim = CDKLM16.CDKLM16(cl_ctx, \\\n",
|
|
||||||
" eta0, u0, v0, Hi, \\\n",
|
|
||||||
" nx, ny, dx, dy, dt, \\\n",
|
|
||||||
" g, f, r, \\\n",
|
|
||||||
" rk_order=2, \n",
|
|
||||||
" write_netcdf=make_netCDF)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
" t = cdklm_sim.step(end_time)\n",
|
|
||||||
" eta1, u1, v1 = cdklm_sim.download()\n",
|
|
||||||
" \n",
|
|
||||||
" # Remove ghost cells\n",
|
|
||||||
" eta1 = eta1[validDomain[3]:-validDomain[1], validDomain[2]:-validDomain[0]]\n",
|
|
||||||
" \n",
|
|
||||||
" print \"\\t\\tt=\" + str(t) + \"\\tMax eta: \" + str(np.max(eta1))\n",
|
|
||||||
" \n",
|
|
||||||
" return [eta0, u0, v0, eta1, u1, v1]\n",
|
|
||||||
"\n",
|
|
||||||
"[eta0, u0, v0, eta1, u1, v1] = runCDKLM(16)\n",
|
|
||||||
"plotData(eta0, u0, v0, eta1, u1, v1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if make_netCDF:\n",
|
|
||||||
" cdklm_sim.cleanUp()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Kurganov-Petrova 2007"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"scrolled": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def runKP(domain_size):\n",
|
|
||||||
" #Clean up old simulator if any:\n",
|
|
||||||
" if 'kp07_sim' in globals():\n",
|
|
||||||
" kp07_sim.cleanUp()\n",
|
|
||||||
" \n",
|
|
||||||
" # Kurganov-Petrova 2007\n",
|
|
||||||
" \n",
|
|
||||||
" nx = domain_size\n",
|
|
||||||
" ny = domain_size\n",
|
|
||||||
" \n",
|
|
||||||
" dx = float(width/nx)\n",
|
|
||||||
" dy = float(height/ny)\n",
|
|
||||||
" \n",
|
|
||||||
" ghosts = np.array([2,2,2,2]) # north, east, south, west\n",
|
|
||||||
" validDomain = np.array([2,2,2,2])\n",
|
|
||||||
" dataShape = (ny + ghosts[0]+ghosts[2], \n",
|
|
||||||
" nx + ghosts[1]+ghosts[3])\n",
|
|
||||||
"\n",
|
|
||||||
" Hi = np.zeros((dataShape[0]+1, dataShape[1]+1), dtype=np.float32)\n",
|
|
||||||
" eta0 = np.zeros(dataShape, dtype=np.float32)\n",
|
|
||||||
" u0 = np.zeros(dataShape, dtype=np.float32)\n",
|
|
||||||
" v0 = np.zeros(dataShape, dtype=np.float32)\n",
|
|
||||||
"\n",
|
|
||||||
" initData(Hi, eta0, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, ghosts, \\\n",
|
|
||||||
" g, f)\n",
|
|
||||||
"\n",
|
|
||||||
" #Initialize simulator\n",
|
|
||||||
" reload(KP07)\n",
|
|
||||||
" kp07_sim = KP07.KP07(cl_ctx, \\\n",
|
|
||||||
" eta0, Hi, u0, v0, \\\n",
|
|
||||||
" nx, ny, dx, dy, dt, \\\n",
|
|
||||||
" g, f, r, \\\n",
|
|
||||||
" write_netcdf=make_netCDF,\\\n",
|
|
||||||
" use_rk2=True)\n",
|
|
||||||
"\n",
|
|
||||||
" t = kp07_sim.step(end_time)\n",
|
|
||||||
" eta1, u1, v1 = kp07_sim.download()\n",
|
|
||||||
" \n",
|
|
||||||
" # Remove ghost cells\n",
|
|
||||||
" eta1 = eta1[validDomain[3]:-validDomain[1], validDomain[2]:-validDomain[0]]\n",
|
|
||||||
" \n",
|
|
||||||
" print \"\\t\\tt=\" + str(t) + \"\\tMax eta: \" + str(np.max(eta1))\n",
|
|
||||||
" \n",
|
|
||||||
" return [eta0, u0, v0, eta1, u1, v1]\n",
|
|
||||||
"\n",
|
|
||||||
"[eta0, u0, v0, eta1, u1, v1] = runKP(16)\n",
|
|
||||||
"plotData(eta0, u0, v0, eta1, u1, v1)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": true
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"if make_netCDF:\n",
|
|
||||||
" kp07_sim.cleanUp()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Control "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false,
|
|
||||||
"scrolled": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"for scheme in schemes:\n",
|
|
||||||
" print \"Scheme: \" + scheme\n",
|
|
||||||
" \n",
|
|
||||||
" data = {};\n",
|
|
||||||
" \n",
|
|
||||||
" # Make reference solution\n",
|
|
||||||
" print \"\\tDomain size (reference solution): \" + str(reference_domain_size)\n",
|
|
||||||
" [_, _, _, eta1_ref, _, _] = eval(\"run\" + scheme + \"(\" + str(reference_domain_size) + \")\")\n",
|
|
||||||
" \n",
|
|
||||||
" data[str(reference_domain_size)] = eta1_ref\n",
|
|
||||||
"\n",
|
|
||||||
" # Run all domain sizes\n",
|
|
||||||
" for domain_size in domain_sizes:\n",
|
|
||||||
" print \"\\tDomain size: \" + str(domain_size)\n",
|
|
||||||
" [_, _, _, eta1, _, _] = eval(\"run\" + scheme + \"(\" + str(domain_size) + \")\")\n",
|
|
||||||
" \n",
|
|
||||||
" data[str(domain_size)] = eta1\n",
|
|
||||||
" \n",
|
|
||||||
" \n",
|
|
||||||
" out_filename = imgdir + \"/\" + scheme + \"_data.npz\"\n",
|
|
||||||
" np.savez(out_filename, **data)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"error = np.zeros([len(schemes), len(domain_sizes)])\n",
|
|
||||||
"\n",
|
|
||||||
"for k, scheme in enumerate(schemes):\n",
|
|
||||||
" print \"Scheme: \" + scheme\n",
|
|
||||||
" \n",
|
|
||||||
" in_filename = imgdir + \"/\" + scheme + \"_data.npz\"\n",
|
|
||||||
" npzfile = np.load(in_filename)\n",
|
|
||||||
" \n",
|
|
||||||
" #Get reference\n",
|
|
||||||
" eta1_ref = npzfile[str(reference_domain_size)].astype(np.float64)\n",
|
|
||||||
" \n",
|
|
||||||
" # Run all domain sizes\n",
|
|
||||||
" for l, domain_size in enumerate(domain_sizes):\n",
|
|
||||||
" eta1 = npzfile[str(domain_size)].astype(np.float64)\n",
|
|
||||||
" \n",
|
|
||||||
" print(\"Max={:.05f}, min={:.05f}, sum={:.010f}\".format(np.max(eta1), np.min(eta1), np.sum(eta1/(domain_size*domain_size))))\n",
|
|
||||||
"\n",
|
|
||||||
" #ver 1 : downsample til minste opplk\u00f8sning\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" eta1_ref_downsampled = rebin(eta1_ref, min(domain_sizes), min(domain_sizes))\n",
|
|
||||||
" eta1_downsampled = rebin(eta1, min(domain_sizes), min(domain_sizes))\n",
|
|
||||||
" tmp =eta1_ref_downsampled - eta1_downsampled\n",
|
|
||||||
" error[k, l] = np.linalg.norm(tmp.flatten(), ord=2)\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" #\"\"\"\n",
|
|
||||||
" #ver 2: downsample til current oppl\u00f8sning\n",
|
|
||||||
" eta1_ref_downsampled = rebin(eta1_ref, domain_size, domain_size)\n",
|
|
||||||
" eta1_downsampled = eta1\n",
|
|
||||||
" tmp =eta1_ref_downsampled - eta1_downsampled\n",
|
|
||||||
" error[k, l] = np.linalg.norm(tmp, ord='fro') / (domain_size*domain_size)\n",
|
|
||||||
" #\"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" #ver 3: upsample til refereanseoppl\u00f8sning\n",
|
|
||||||
" eta1_ref_downsampled = eta1_ref\n",
|
|
||||||
" upsampling = np.ones(np.divide(eta1_ref.shape, eta1.shape))\n",
|
|
||||||
" eta1_downsampled = np.kron(eta1, upsampling)\n",
|
|
||||||
" tmp =eta1_ref_downsampled - eta1_downsampled\n",
|
|
||||||
" error[k, l] = np.linalg.norm(tmp.flatten(), ord=2)\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" \n",
|
|
||||||
" \n",
|
|
||||||
"fig = plt.figure()\n",
|
|
||||||
"setBwStyles(fig.gca())\n",
|
|
||||||
"\n",
|
|
||||||
"x = np.linspace(domain_sizes[0], domain_sizes[-1], 100);\n",
|
|
||||||
"\n",
|
|
||||||
"#scaling = np.min(error[:,0]) * domain_sizes[0]**0.5 * 0.5\n",
|
|
||||||
"#plt.loglog(x, scaling/(np.sqrt(x)), '-', color='gray', label='Order 0.5')\n",
|
|
||||||
"\n",
|
|
||||||
"scaling = np.max(error[:,0]) * domain_sizes[0] * 2\n",
|
|
||||||
"plt.loglog(x, scaling/x, '-', color='gray', label='Order 1')\n",
|
|
||||||
"\n",
|
|
||||||
"scaling = np.min(error[:,0]) * domain_sizes[0]**2 * 0.5\n",
|
|
||||||
"plt.loglog(x, scaling/(x*x), '-', color='gray', label='Order 2')\n",
|
|
||||||
"\n",
|
|
||||||
"for k in range(len(schemes)):\n",
|
|
||||||
" print \"Scheme \" + str(schemes[k])\n",
|
|
||||||
" for l in range(len(domain_sizes)):\n",
|
|
||||||
" print \"\\tDomain size: \" + str(domain_sizes[l]) + \": \" + str(error[k,l])\n",
|
|
||||||
" plt.loglog(domain_sizes, error[k,:], label=schemes[k], markersize=15)\n",
|
|
||||||
"#plt.loglog(domain_sizes, np.abs(error[0,:]-error[1,:]), label=\"Diff\", markersize=15)\n",
|
|
||||||
" \n",
|
|
||||||
"plt.xlabel('Number of cells')\n",
|
|
||||||
"plt.ylabel('Error')\n",
|
|
||||||
"plt.legend(markerscale=0.5)\n",
|
|
||||||
"\n",
|
|
||||||
"plt.savefig(imgdir + \"/\" + \"convergence.pdf\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {
|
|
||||||
"collapsed": false
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"git": {
|
|
||||||
"suppress_outputs": true
|
|
||||||
},
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 2",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python2"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 2
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython2",
|
|
||||||
"version": "2.7.6"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 1
|
|
||||||
}
|
|
@ -1,205 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
This python module implements
|
|
||||||
Alina Chertock, Michael Dudzinski, A. Kurganov & Maria Lukacova-Medvidova (2016)
|
|
||||||
Well-Balanced Schemes for the Shallow Water Equations with Coriolis Forces
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
#Import packages we need
|
|
||||||
import numpy as np
|
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
|
||||||
"""
|
|
||||||
class CDKLM16:
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initialization routine
|
|
||||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
|
||||||
u0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
|
||||||
v0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
|
||||||
nx: Number of cells along x-axis
|
|
||||||
ny: Number of cells along y-axis
|
|
||||||
dx: Grid cell spacing along x-axis (20 000 m)
|
|
||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
|
||||||
dt: Size of each timestep (90 s)
|
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
|
||||||
f: Coriolis parameter (1.2e-4 s^1)
|
|
||||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
|
||||||
"""
|
|
||||||
def __init__(self, \
|
|
||||||
cl_ctx, \
|
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
|
||||||
dx, dy, dt, \
|
|
||||||
g, f, r, \
|
|
||||||
theta=1.3, use_rk2=True,
|
|
||||||
wind_stress=Common.WindStressParams(), \
|
|
||||||
block_width=16, block_height=16):
|
|
||||||
self.cl_ctx = cl_ctx
|
|
||||||
|
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
|
||||||
self.kernel = Common.get_kernel(self.cl_ctx, "CDKLM16_kernel.opencl", block_width, block_height)
|
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 3
|
|
||||||
ghost_cells_y = 3
|
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.f = np.float32(f)
|
|
||||||
self.r = np.float32(r)
|
|
||||||
self.theta = np.float32(theta)
|
|
||||||
self.use_rk2 = use_rk2
|
|
||||||
self.wind_stress = wind_stress
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "Chertok, Dudzinski, Kurganov, Lukacova-Medvidova"
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Function which steps n timesteps
|
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
if (self.use_rk2):
|
|
||||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, \
|
|
||||||
self.theta, \
|
|
||||||
self.f, \
|
|
||||||
self.r, \
|
|
||||||
np.int32(0), \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, \
|
|
||||||
self.theta, \
|
|
||||||
self.f, \
|
|
||||||
self.r, \
|
|
||||||
np.int32(1), \
|
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
else:
|
|
||||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, \
|
|
||||||
self.theta, \
|
|
||||||
self.f, \
|
|
||||||
self.r, \
|
|
||||||
np.int32(0), \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
self.cl_data.swap()
|
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
|
|
||||||
return self.t
|
|
||||||
|
|
||||||
"""
|
|
||||||
Static function which reads a text file and creates an OpenCL kernel from that
|
|
||||||
"""
|
|
||||||
def get_kernel(self, kernel_filename):
|
|
||||||
#Read the proper program
|
|
||||||
module_path = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
fullpath = os.path.join(module_path, kernel_filename)
|
|
||||||
with open(fullpath, "r") as kernel_file:
|
|
||||||
kernel_string = kernel_file.read()
|
|
||||||
kernel = cl.Program(self.cl_ctx, kernel_string).build()
|
|
||||||
|
|
||||||
return kernel
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.cl_data.download(self.cl_queue)
|
|
||||||
|
|
@ -1,440 +0,0 @@
|
|||||||
/*
|
|
||||||
This OpenCL kernel implements the Kurganov-Petrova numerical scheme
|
|
||||||
for the shallow water equations, described in
|
|
||||||
A. Kurganov & Guergana Petrova
|
|
||||||
A Second-Order Well-Balanced Positivity Preserving Central-Upwind
|
|
||||||
Scheme for the Saint-Venant System Communications in Mathematical
|
|
||||||
Sciences, 5 (2007), 133-160.
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
float3 CDKLM16_F_func(const float3 Q, const float g) {
|
|
||||||
float3 F;
|
|
||||||
|
|
||||||
F.x = Q.x*Q.y; //h*u
|
|
||||||
F.y = Q.x*Q.y*Q.y + 0.5f*g*Q.x*Q.x; //h*u*u + 0.5f*g*h*h;
|
|
||||||
F.z = Q.x*Q.y*Q.z; //h*u*v;
|
|
||||||
|
|
||||||
return F;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Note that the input vectors are (h, u, v), thus not the regular
|
|
||||||
* (h, hu, hv)
|
|
||||||
*/
|
|
||||||
float3 CDKLM16_flux(const float3 Qm, float3 Qp, const float g) {
|
|
||||||
const float3 Fp = CDKLM16_F_func(Qp, g);
|
|
||||||
const float up = Qp.y; // u
|
|
||||||
const float cp = sqrt(g*Qp.x); // sqrt(g*h)
|
|
||||||
|
|
||||||
const float3 Fm = CDKLM16_F_func(Qm, g);
|
|
||||||
const float um = Qm.y; // u
|
|
||||||
const float cm = sqrt(g*Qm.x); // sqrt(g*h)
|
|
||||||
|
|
||||||
const float am = min(min(um-cm, up-cp), 0.0f); // largest negative wave speed
|
|
||||||
const float ap = max(max(um+cm, up+cp), 0.0f); // largest positive wave speed
|
|
||||||
|
|
||||||
float3 F;
|
|
||||||
|
|
||||||
F.x = ((ap*Fm.x - am*Fp.x) + ap*am*(Qp.x-Qm.x))/(ap-am);
|
|
||||||
F.y = ((ap*Fm.y - am*Fp.y) + ap*am*(Qp.y-Qm.y))/(ap-am);
|
|
||||||
F.z = (Qm.y + Qp.y > 0) ? Fm.z : Fp.z; //Upwinding to be consistent
|
|
||||||
|
|
||||||
return F;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
__kernel void swe_2D(
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
float g_,
|
|
||||||
|
|
||||||
float theta_,
|
|
||||||
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
int step_,
|
|
||||||
|
|
||||||
//Input h^n
|
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
|
||||||
|
|
||||||
//Output h^{n+1}
|
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
|
||||||
__global float* hv1_ptr_, int hv1_pitch_,
|
|
||||||
|
|
||||||
//Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Index of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = get_global_id(0) + 3; //Skip global ghost cells, i.e., +3
|
|
||||||
const int tj = get_global_id(1) + 3;
|
|
||||||
|
|
||||||
// Our physical variables
|
|
||||||
__local float R[3][block_height+6][block_width+6];
|
|
||||||
|
|
||||||
// Our reconstruction variables
|
|
||||||
__local float Q[4][block_height+4][block_width+4];
|
|
||||||
__local float Qx[4][block_height][block_width+2];
|
|
||||||
__local float Qy[4][block_height+2][block_width];
|
|
||||||
|
|
||||||
// Our fluxes
|
|
||||||
__local float F[3][block_height][block_width+1];
|
|
||||||
__local float G[3][block_height+1][block_width];
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
|
||||||
for (int j=ty; j<block_height+6; j+=get_local_size(1)) {
|
|
||||||
const int l = clamp(by + j, 0, ny_+5); // Out of bounds
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the arrays
|
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h0_ptr_ + h0_pitch_*l);
|
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu0_ptr_ + hu0_pitch_*l);
|
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv0_ptr_ + hv0_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+6; i+=get_local_size(0)) {
|
|
||||||
const int k = clamp(bx + i, 0, nx_+5); // Out of bounds
|
|
||||||
|
|
||||||
R[0][j][i] = h_row[k];
|
|
||||||
R[1][j][i] = hu_row[k];
|
|
||||||
R[2][j][i] = hv_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Fix boundary conditions
|
|
||||||
{
|
|
||||||
const int i = tx + 3; //Skip local ghost cells, i.e., +3
|
|
||||||
const int j = ty + 3;
|
|
||||||
|
|
||||||
if (ti == 3) {
|
|
||||||
R[0][j][i-1] = R[0][j][i];
|
|
||||||
R[1][j][i-1] = -R[1][j][i];
|
|
||||||
R[2][j][i-1] = R[2][j][i];
|
|
||||||
|
|
||||||
R[0][j][i-2] = R[0][j][i+1];
|
|
||||||
R[1][j][i-2] = -R[1][j][i+1];
|
|
||||||
R[2][j][i-2] = R[2][j][i+1];
|
|
||||||
|
|
||||||
R[0][j][i-3] = R[0][j][i+2];
|
|
||||||
R[1][j][i-3] = -R[1][j][i+2];
|
|
||||||
R[2][j][i-3] = R[2][j][i+2];
|
|
||||||
}
|
|
||||||
if (ti == nx_+2) {
|
|
||||||
R[0][j][i+1] = R[0][j][i];
|
|
||||||
R[1][j][i+1] = -R[1][j][i];
|
|
||||||
R[2][j][i+1] = R[2][j][i];
|
|
||||||
|
|
||||||
R[0][j][i+2] = R[0][j][i-1];
|
|
||||||
R[1][j][i+2] = -R[1][j][i-1];
|
|
||||||
R[2][j][i+2] = R[2][j][i-1];
|
|
||||||
|
|
||||||
R[0][j][i+3] = R[0][j][i-2];
|
|
||||||
R[1][j][i+3] = -R[1][j][i-2];
|
|
||||||
R[2][j][i+3] = R[2][j][i-2];
|
|
||||||
}
|
|
||||||
if (tj == 3) {
|
|
||||||
R[0][j-1][i] = R[0][j][i];
|
|
||||||
R[1][j-1][i] = R[1][j][i];
|
|
||||||
R[2][j-1][i] = -R[2][j][i];
|
|
||||||
|
|
||||||
R[0][j-2][i] = R[0][j+1][i];
|
|
||||||
R[1][j-2][i] = R[1][j+1][i];
|
|
||||||
R[2][j-2][i] = -R[2][j+1][i];
|
|
||||||
|
|
||||||
R[0][j-3][i] = R[0][j+2][i];
|
|
||||||
R[1][j-3][i] = R[1][j+2][i];
|
|
||||||
R[2][j-3][i] = -R[2][j+2][i];
|
|
||||||
}
|
|
||||||
if (tj == ny_+2) {
|
|
||||||
R[0][j+1][i] = R[0][j][i];
|
|
||||||
R[1][j+1][i] = R[1][j][i];
|
|
||||||
R[2][j+1][i] = -R[2][j][i];
|
|
||||||
|
|
||||||
R[0][j+2][i] = R[0][j-1][i];
|
|
||||||
R[1][j+2][i] = R[1][j-1][i];
|
|
||||||
R[2][j+2][i] = -R[2][j-1][i];
|
|
||||||
|
|
||||||
R[0][j+3][i] = R[0][j-2][i];
|
|
||||||
R[1][j+3][i] = R[1][j-2][i];
|
|
||||||
R[2][j+3][i] = -R[2][j-2][i];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Create our "steady state" reconstruction variables (u, v, K, L)
|
|
||||||
for (int j=ty; j<block_height+4; j+=get_local_size(1)) {
|
|
||||||
const int l = j + 1; //Skip one "ghost cell row" of Q, going from 6x6 to 4x4 "halo"
|
|
||||||
for (int i=tx; i<block_width+4; i+=get_local_size(0)) {
|
|
||||||
const int k = i + 1;
|
|
||||||
|
|
||||||
const float h = R[0][l][k];
|
|
||||||
const float u = R[1][l][k] / h;
|
|
||||||
const float v = R[2][l][k] / h;
|
|
||||||
|
|
||||||
const float B = 0.0f;
|
|
||||||
const float U = 0.25f * f_/g_ * (1.0*R[1][l+1][k]/R[0][l+1][k] + 2.0f*u + 1.0f*R[1][l-1][k]/R[0][l-1][k]);
|
|
||||||
const float V = 0.25f * f_/g_ * (1.0*R[2][l][k+1]/R[0][l][k+1] + 2.0f*v + 1.0f*R[2][l][k-1]/R[0][l][k-1]);
|
|
||||||
//const float U = f_/g_ * u;
|
|
||||||
//const float V = f_/g_ * v;
|
|
||||||
const float K = h + B - V;
|
|
||||||
const float L = h + B + U;
|
|
||||||
|
|
||||||
Q[0][j][i] = u;
|
|
||||||
Q[1][j][i] = v;
|
|
||||||
Q[2][j][i] = K;
|
|
||||||
Q[3][j][i] = L;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct slopes along x axis
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
|
||||||
const int l = j + 2; //Skip ghost cells
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
const int k = i + 1;
|
|
||||||
for (int p=0; p<4; ++p) {
|
|
||||||
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Reconstruct slopes along y axis
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
const int l = j + 1;
|
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
|
||||||
const int k = i + 2; //Skip ghost cells
|
|
||||||
for (int p=0; p<4; ++p) {
|
|
||||||
Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Compute fluxes along the x axis
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
|
||||||
const int l = j + 2; //Skip ghost cells (be consistent with reconstruction offsets)
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const int k = i + 1;
|
|
||||||
|
|
||||||
// R=(u, v, K, L) reconstructed at a cell interface from the right (p) and left (m)
|
|
||||||
const float4 Rp = (float4)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
|
||||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
|
||||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1],
|
|
||||||
Q[3][l][k+1] - 0.5f*Qx[3][j][i+1]);
|
|
||||||
const float4 Rm = (float4)(Q[0][l][k ] + 0.5f*Qx[0][j][i ],
|
|
||||||
Q[1][l][k ] + 0.5f*Qx[1][j][i ],
|
|
||||||
Q[2][l][k ] + 0.5f*Qx[2][j][i ],
|
|
||||||
Q[3][l][k ] + 0.5f*Qx[3][j][i ]);
|
|
||||||
|
|
||||||
// Variables to reconstruct h from u, v, K, L
|
|
||||||
const float vp = Q[1][l][k+1];
|
|
||||||
const float vm = Q[1][l][k ];
|
|
||||||
const float V = 0.5f * f_/g_ * (vp + vm);
|
|
||||||
const float B = 0.0f;
|
|
||||||
|
|
||||||
// Reconstruct h = K/g + V - B
|
|
||||||
const float hp = Rp.z + V - B;
|
|
||||||
const float hm = Rm.z + V - B;
|
|
||||||
|
|
||||||
// Our flux variables Q=(h, u, v)
|
|
||||||
const float3 Qp = (float3)(hp, Rp.x, Rp.y);
|
|
||||||
const float3 Qm = (float3)(hm, Rm.x, Rm.y);
|
|
||||||
|
|
||||||
// Computed flux
|
|
||||||
const float3 flux = CDKLM16_flux(Qm, Qp, g_);
|
|
||||||
F[0][j][i] = flux.x;
|
|
||||||
F[1][j][i] = flux.y;
|
|
||||||
F[2][j][i] = flux.z;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Compute fluxes along the y axis
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const int l = j + 1;
|
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
|
||||||
const int k = i + 2; //Skip ghost cells
|
|
||||||
// Q at interface from the right and left
|
|
||||||
const float4 Rp = (float4)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
|
||||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i],
|
|
||||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
|
||||||
Q[3][l+1][k] - 0.5f*Qy[3][j+1][i]);
|
|
||||||
const float4 Rm = (float4)(Q[0][l ][k] + 0.5f*Qy[0][j ][i],
|
|
||||||
Q[1][l ][k] + 0.5f*Qy[1][j ][i],
|
|
||||||
Q[2][l ][k] + 0.5f*Qy[2][j ][i],
|
|
||||||
Q[3][l ][k] + 0.5f*Qy[3][j ][i]);
|
|
||||||
|
|
||||||
// Variables to reconstruct h from u, v, K, L
|
|
||||||
const float up = Q[0][l+1][k];
|
|
||||||
const float um = Q[0][l ][k];
|
|
||||||
const float U = 0.5f * f_/g_ * (up + um);
|
|
||||||
const float B = 0.0f;
|
|
||||||
|
|
||||||
// Reconstruct h = L/g - U - B
|
|
||||||
const float hp = Rp.w - U - B;
|
|
||||||
const float hm = Rm.w - U - B;
|
|
||||||
|
|
||||||
// Our flux variables Q=(h, v, u)
|
|
||||||
// Note that we swap u and v
|
|
||||||
const float3 Qp = (float3)(hp, Rp.y, Rp.x);
|
|
||||||
const float3 Qm = (float3)(hm, Rm.y, Rm.x);
|
|
||||||
|
|
||||||
// Computed flux
|
|
||||||
// Note that we swap back u and v
|
|
||||||
const float3 flux = CDKLM16_flux(Qm, Qp, g_);
|
|
||||||
G[0][j][i] = flux.x;
|
|
||||||
G[1][j][i] = flux.z;
|
|
||||||
G[2][j][i] = flux.y;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Sum fluxes and advance in time for all internal cells
|
|
||||||
if (ti > 2 && ti < nx_+3 && tj > 2 && tj < ny_+3) {
|
|
||||||
const int i = tx + 3; //Skip local ghost cells, i.e., +2
|
|
||||||
const int j = ty + 3;
|
|
||||||
|
|
||||||
const float X = windStressX(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
const float Y = windStressY(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
const float h1 = R[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_
|
|
||||||
+ (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_;
|
|
||||||
const float hu1 = R[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_
|
|
||||||
+ (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_
|
|
||||||
+ dt_*X - dt_*f_*R[2][j][i];
|
|
||||||
const float hv1 = R[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
|
||||||
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_
|
|
||||||
+ dt_*Y + dt_*f_*R[1][j][i];
|
|
||||||
|
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj);
|
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj);
|
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj);
|
|
||||||
|
|
||||||
const float C = 2.0f*r_*dt_/R[0][j][i];
|
|
||||||
|
|
||||||
if (step_ == 0) {
|
|
||||||
//First step of RK2 ODE integrator
|
|
||||||
|
|
||||||
h_row[ti] = h1;
|
|
||||||
hu_row[ti] = hu1 / (1.0f + C);
|
|
||||||
hv_row[ti] = hv1 / (1.0f + C);
|
|
||||||
}
|
|
||||||
else if (step_ == 1) {
|
|
||||||
//Second step of RK2 ODE integrator
|
|
||||||
|
|
||||||
//First read Q^n
|
|
||||||
const float h_a = h_row[ti];
|
|
||||||
const float hu_a = hu_row[ti];
|
|
||||||
const float hv_a = hv_row[ti];
|
|
||||||
|
|
||||||
//Compute Q^n+1
|
|
||||||
const float h_b = 0.5f*(h_a + h1);
|
|
||||||
const float hu_b = 0.5f*(hu_a + hu1);
|
|
||||||
const float hv_b = 0.5f*(hv_a + hv1);
|
|
||||||
|
|
||||||
//Write to main memory
|
|
||||||
h_row[ti] = h_b;
|
|
||||||
hu_row[ti] = hu_b / (1.0f + 0.5f*C);
|
|
||||||
hv_row[ti] = hv_b / (1.0f + 0.5f*C);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,195 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
This python module implements the Centered in Time, Centered in Space
|
|
||||||
(leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
#Import packages we need
|
|
||||||
import numpy as np
|
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class that solves the SW equations using the Centered in time centered in space scheme
|
|
||||||
"""
|
|
||||||
class CTCS:
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initialization routine
|
|
||||||
H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
|
||||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
|
||||||
nx: Number of cells along x-axis
|
|
||||||
ny: Number of cells along y-axis
|
|
||||||
dx: Grid cell spacing along x-axis (20 000 m)
|
|
||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
|
||||||
dt: Size of each timestep (90 s)
|
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
|
||||||
f: Coriolis parameter (1.2e-4 s^1)
|
|
||||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
|
||||||
A: Eddy viscosity coefficient (O(dx))
|
|
||||||
wind_stress: Wind stress parameters
|
|
||||||
"""
|
|
||||||
def __init__(self, \
|
|
||||||
cl_ctx, \
|
|
||||||
H, eta0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
|
||||||
dx, dy, dt, \
|
|
||||||
g, f, r, A, \
|
|
||||||
wind_stress=Common.WindStressParams(), \
|
|
||||||
block_width=16, block_height=16):
|
|
||||||
self.cl_ctx = cl_ctx
|
|
||||||
|
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
reload(Common)
|
|
||||||
#Get kernels
|
|
||||||
self.u_kernel = Common.get_kernel(self.cl_ctx, "CTCS_U_kernel.opencl", block_width, block_height)
|
|
||||||
self.v_kernel = Common.get_kernel(self.cl_ctx, "CTCS_V_kernel.opencl", block_width, block_height)
|
|
||||||
self.eta_kernel = Common.get_kernel(self.cl_ctx, "CTCS_eta_kernel.opencl", block_width, block_height)
|
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 1
|
|
||||||
ghost_cells_y = 1
|
|
||||||
self.H = Common.OpenCLArray2D(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, H)
|
|
||||||
self.cl_data = Common.SWEDataArkawaC(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.f = np.float32(f)
|
|
||||||
self.r = np.float32(r)
|
|
||||||
self.A = np.float32(A)
|
|
||||||
self.wind_stress = wind_stress
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "Centered in time, centered in space"
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Function which steps n timesteps
|
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
|
||||||
#Notation:
|
|
||||||
# cl_data.u0 => U^{n-1} before U kernel, U^{n+1} after U kernel
|
|
||||||
# cl_data.u1 => U^{n}
|
|
||||||
# When we call cl_data.swap(), we swap these, so that
|
|
||||||
# cl_data.u0 => U^{n}
|
|
||||||
# cl_data.u1 => U^{n+1} (U kernel has been executed)
|
|
||||||
# Now we are ready for the next time step
|
|
||||||
|
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
self.eta_kernel.computeEtaKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, self.r, \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, # eta^{n-1} => eta^{n+1} \
|
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, # U^{n} \
|
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch) # V^{n}
|
|
||||||
|
|
||||||
self.u_kernel.computeUKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, self.r, self.A,\
|
|
||||||
self.H.data, self.H.pitch, \
|
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, # eta^{n} \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, # U^{n-1} => U^{n+1} \
|
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, # U^{n} \
|
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, # V^{n} \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
|
|
||||||
self.v_kernel.computeVKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, self.r, self.A,\
|
|
||||||
self.H.data, self.H.pitch, \
|
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, # eta^{n} \
|
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, # U^{n} \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, # V^{n-1} => V^{n+1} \
|
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, # V^{n} \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
|
|
||||||
#After the kernels, swap the data pointers
|
|
||||||
self.cl_data.swap()
|
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
return self.t
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.cl_data.download(self.cl_queue)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,435 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
This python module implements the Centered in Time, Centered in Space
|
|
||||||
(leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
#Import packages we need
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
import numpy as np
|
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class that holds data for the SW equations in OpenCL
|
|
||||||
"""
|
|
||||||
class CTCS2LayerDataCL:
|
|
||||||
"""
|
|
||||||
Uploads initial data to the CL device
|
|
||||||
"""
|
|
||||||
def __init__(self, cl_ctx, h1_0, eta1_0, u1_0, v1_0, \
|
|
||||||
h2_0, eta2_0, u2_0, v2_0):
|
|
||||||
#Make sure that the data is single precision floating point
|
|
||||||
if (not np.issubdtype(h1_0.dtype, np.float32) or np.isfortran(h1_0)):
|
|
||||||
print "Converting H_0"
|
|
||||||
h1_0 = h1_0.astype(np.float32, order='C')
|
|
||||||
if (not np.issubdtype(eta1_0.dtype, np.float32) or np.isfortran(eta1_0)):
|
|
||||||
print "Converting Eta_0"
|
|
||||||
eta1_0 = eta1_0.astype(np.float32, order='C')
|
|
||||||
if (not np.issubdtype(u1_0.dtype, np.float32) or np.isfortran(u1_0)):
|
|
||||||
print "Converting U_0"
|
|
||||||
u1_0 = u1_0.astype(np.float32, order='C')
|
|
||||||
if (not np.issubdtype(v1_0.dtype, np.float32) or np.isfortran(v1_0)):
|
|
||||||
print "Converting V_0"
|
|
||||||
v1_0 = v1_0.astype(np.float32, order='C')
|
|
||||||
|
|
||||||
#Same for second (deepest) layer
|
|
||||||
if (not np.issubdtype(h2_0.dtype, np.float32) or np.isfortran(h2_0)):
|
|
||||||
print "Converting H2_0"
|
|
||||||
h2_0 = h2_0.astype(np.float32, order='C')
|
|
||||||
if (not np.issubdtype(eta2_0.dtype, np.float32) or np.isfortran(eta2_0)):
|
|
||||||
print "Converting Eta2_0"
|
|
||||||
eta2_0 = eta2_0.astype(np.float32, order='C')
|
|
||||||
if (not np.issubdtype(u2_0.dtype, np.float32) or np.isfortran(u2_0)):
|
|
||||||
print "Converting U2_0"
|
|
||||||
u2_0 = u2_0.astype(np.float32, order='C')
|
|
||||||
if (not np.issubdtype(v2_0.dtype, np.float32) or np.isfortran(v2_0)):
|
|
||||||
print "Converting V2_0"
|
|
||||||
v2_0 = v2_0.astype(np.float32, order='C')
|
|
||||||
|
|
||||||
self.ny, self.nx = h1_0.shape
|
|
||||||
self.nx = self.nx - 2 # Ghost cells
|
|
||||||
self.ny = self.ny - 2
|
|
||||||
|
|
||||||
assert(h1_0.shape == (self.ny+2, self.nx+2))
|
|
||||||
assert(eta1_0.shape == (self.ny+2, self.nx+2))
|
|
||||||
assert(u1_0.shape == (self.ny+2, self.nx+1))
|
|
||||||
assert(v1_0.shape == (self.ny+1, self.nx+2))
|
|
||||||
|
|
||||||
#Same for layer 2
|
|
||||||
assert(h2_0.shape == (self.ny+2, self.nx+2))
|
|
||||||
assert(eta2_0.shape == (self.ny+2, self.nx+2))
|
|
||||||
assert(u2_0.shape == (self.ny+2, self.nx+1))
|
|
||||||
assert(v2_0.shape == (self.ny+1, self.nx+2))
|
|
||||||
|
|
||||||
#Upload data to the device
|
|
||||||
mf = cl.mem_flags
|
|
||||||
self.h1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=h1_0)
|
|
||||||
|
|
||||||
self.eta1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta1_0)
|
|
||||||
self.eta1_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta1_0)
|
|
||||||
|
|
||||||
self.u1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u1_0)
|
|
||||||
self.u1_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u1_0)
|
|
||||||
|
|
||||||
self.v1_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v1_0)
|
|
||||||
self.v1_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v1_0)
|
|
||||||
|
|
||||||
#Same for layer 2
|
|
||||||
self.h2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=h2_0)
|
|
||||||
|
|
||||||
self.eta2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta2_0)
|
|
||||||
self.eta2_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=eta2_0)
|
|
||||||
|
|
||||||
self.u2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u2_0)
|
|
||||||
self.u2_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=u2_0)
|
|
||||||
|
|
||||||
self.v2_0 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v2_0)
|
|
||||||
self.v2_1 = cl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=v2_0)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Compute pitches
|
|
||||||
self.h1_0_pitch = np.int32(h1_0.shape[1]*4)
|
|
||||||
|
|
||||||
self.eta1_0_pitch = np.int32(eta1_0.shape[1]*4)
|
|
||||||
self.eta1_1_pitch = np.int32(eta1_0.shape[1]*4)
|
|
||||||
|
|
||||||
self.u1_0_pitch = np.int32(u1_0.shape[1]*4)
|
|
||||||
self.u1_1_pitch = np.int32(u1_0.shape[1]*4)
|
|
||||||
|
|
||||||
self.v1_0_pitch = np.int32(v1_0.shape[1]*4)
|
|
||||||
self.v1_1_pitch = np.int32(v1_0.shape[1]*4)
|
|
||||||
|
|
||||||
#Same for layer 2
|
|
||||||
self.h2_0_pitch = np.int32(h2_0.shape[1]*4)
|
|
||||||
|
|
||||||
self.eta2_0_pitch = np.int32(eta2_0.shape[1]*4)
|
|
||||||
self.eta2_1_pitch = np.int32(eta2_0.shape[1]*4)
|
|
||||||
|
|
||||||
self.u2_0_pitch = np.int32(u2_0.shape[1]*4)
|
|
||||||
self.u2_1_pitch = np.int32(u2_0.shape[1]*4)
|
|
||||||
|
|
||||||
self.v2_0_pitch = np.int32(v2_0.shape[1]*4)
|
|
||||||
self.v2_1_pitch = np.int32(v2_0.shape[1]*4)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Swaps the variables after a timestep has been completed
|
|
||||||
"""
|
|
||||||
def swap(self):
|
|
||||||
self.eta1_1, self.eta1_0 = self.eta1_0, self.eta1_1
|
|
||||||
self.u1_1, self.u1_0 = self.u1_0, self.u1_1
|
|
||||||
self.v1_1, self.v1_0 = self.v1_0, self.v1_1
|
|
||||||
|
|
||||||
#Same for layer 2
|
|
||||||
self.eta2_1, self.eta2_0 = self.eta2_0, self.eta2_1
|
|
||||||
self.u2_1, self.u2_0 = self.u2_0, self.u2_1
|
|
||||||
self.v2_1, self.v2_0 = self.v2_0, self.v2_1
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Enables downloading data from CL device to Python
|
|
||||||
"""
|
|
||||||
def download(self, cl_queue):
|
|
||||||
#Allocate data on the host for result
|
|
||||||
eta1_1 = np.empty((self.ny+2, self.nx+2), dtype=np.float32, order='C')
|
|
||||||
u1_1 = np.empty((self.ny+2, self.nx+1), dtype=np.float32, order='C')
|
|
||||||
v1_1 = np.empty((self.ny+1, self.nx+2), dtype=np.float32, order='C')
|
|
||||||
|
|
||||||
#Same for layer 2
|
|
||||||
eta2_1 = np.empty((self.ny+2, self.nx+2), dtype=np.float32, order='C')
|
|
||||||
u2_1 = np.empty((self.ny+2, self.nx+1), dtype=np.float32, order='C')
|
|
||||||
v2_1 = np.empty((self.ny+1, self.nx+2), dtype=np.float32, order='C')
|
|
||||||
|
|
||||||
#Copy data from device to host
|
|
||||||
cl.enqueue_copy(cl_queue, eta1_1, self.eta1_1)
|
|
||||||
cl.enqueue_copy(cl_queue, u1_1, self.u1_1)
|
|
||||||
cl.enqueue_copy(cl_queue, v1_1, self.v1_1)
|
|
||||||
|
|
||||||
#Same for layer 2
|
|
||||||
cl.enqueue_copy(cl_queue, eta2_1, self.eta2_1)
|
|
||||||
cl.enqueue_copy(cl_queue, u2_1, self.u2_1)
|
|
||||||
cl.enqueue_copy(cl_queue, v2_1, self.v2_1)
|
|
||||||
|
|
||||||
|
|
||||||
#Return
|
|
||||||
return eta1_1, u1_1, v1_1, eta2_1, u2_1, v2_1
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class that solves the SW equations using the Centered in time centered in space scheme
|
|
||||||
"""
|
|
||||||
class CTCS2Layer:
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initialization routine
|
|
||||||
h1_0: Water depth incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
eta1_0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
u1_0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
|
||||||
v1_0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
|
||||||
h2_0: Water depth (layer 2) incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
eta2_0: Initial deviation from mean sea level (layer 2) incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
u2_0: Initial momentum (layer 2) along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
|
||||||
v2_0: Initial momentum (layer 2) along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
|
||||||
nx: Number of cells along x-axis
|
|
||||||
ny: Number of cells along y-axis
|
|
||||||
dx: Grid cell spacing along x-axis (20 000 m)
|
|
||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
|
||||||
dt: Size of each timestep (90 s)
|
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
|
||||||
f: Coriolis parameter (1.2e-4 s^1)
|
|
||||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
|
||||||
r2: Inter-layer friction coefficient (m/s)
|
|
||||||
A: Eddy viscosity coefficient (O(dx))
|
|
||||||
rho1: Density of upper layer (1025.0 kg / m^3)
|
|
||||||
rho2: Density of lower layer (1000.0 kg / m^3)
|
|
||||||
wind_type: Type of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
|
||||||
wind_tau0: Amplitude of wind stress (Pa)
|
|
||||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
|
||||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
|
||||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
|
||||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
|
||||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
|
||||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
|
||||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
|
||||||
"""
|
|
||||||
def __init__(self, \
|
|
||||||
h1_0, eta1_0, u1_0, v1_0, \
|
|
||||||
h2_0, eta2_0, u2_0, v2_0, \
|
|
||||||
nx, ny, \
|
|
||||||
dx, dy, dt, \
|
|
||||||
g, f, r1, r2, A, \
|
|
||||||
rho1, rho2,
|
|
||||||
wind_type=99, # "no wind" \
|
|
||||||
wind_tau0=0, wind_alpha=0, wind_xm=0, wind_Rc=0, \
|
|
||||||
wind_x0=0, wind_y0=0, \
|
|
||||||
wind_u0=0, wind_v0=0):
|
|
||||||
#Make sure we get compiler output from OpenCL
|
|
||||||
os.environ["PYOPENCL_COMPILER_OUTPUT"] = "1"
|
|
||||||
|
|
||||||
#Set which CL device to use
|
|
||||||
os.environ["PYOPENCL_CTX"] = "1"
|
|
||||||
|
|
||||||
#Create OpenCL context
|
|
||||||
self.cl_ctx = cl.create_some_context()
|
|
||||||
print "Using ", self.cl_ctx.devices[0].name
|
|
||||||
|
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
|
||||||
self.u_kernel = self.get_kernel("CTCS2Layer_U_kernel.opencl")
|
|
||||||
self.v_kernel = self.get_kernel("CTCS2Layer_V_kernel.opencl")
|
|
||||||
self.eta_kernel = self.get_kernel("CTCS2Layer_eta_kernel.opencl")
|
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
self.cl_data = CTCS2LayerDataCL(self.cl_ctx, h1_0, eta1_0, u1_0, v1_0, h2_0, eta2_0, u2_0, v2_0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.f = np.float32(f)
|
|
||||||
self.r1 = np.float32(r1)
|
|
||||||
self.r2 = np.float32(r2)
|
|
||||||
self.A = np.float32(A)
|
|
||||||
assert(rho1 <= rho2)
|
|
||||||
self.rho1 = np.float32(rho1)
|
|
||||||
self.rho2 = np.float32(rho2)
|
|
||||||
self.wind_type = np.int32(wind_type)
|
|
||||||
self.wind_tau0 = np.float32(wind_tau0)
|
|
||||||
self.wind_alpha = np.float32(wind_alpha)
|
|
||||||
self.wind_xm = np.float32(wind_xm)
|
|
||||||
self.wind_Rc = np.float32(wind_Rc)
|
|
||||||
self.wind_x0 = np.float32(wind_x0)
|
|
||||||
self.wind_y0 = np.float32(wind_y0)
|
|
||||||
self.wind_u0 = np.float32(wind_u0)
|
|
||||||
self.wind_v0 = np.float32(wind_v0)
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (8, 8) # WARNING::: MUST MATCH defines of block_width/height in kernels!
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Function which steps n timesteps
|
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
|
||||||
#Notation:
|
|
||||||
# cl_data.u0 => U^{n-1} before U kernel, U^{n+1} after U kernel
|
|
||||||
# cl_data.u1 => U^{n}
|
|
||||||
# When we call cl_data.swap(), we swap these, so that
|
|
||||||
# cl_data.u0 => U^{n}
|
|
||||||
# cl_data.u1 => U^{n+1} (U kernel has been executed)
|
|
||||||
# Now we are ready for the next time step
|
|
||||||
|
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
self.eta_kernel.computeEtaKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
\
|
|
||||||
self.cl_data.eta1_0, self.cl_data.eta1_0_pitch, # eta^{n-1} => eta^{n+1} \
|
|
||||||
self.cl_data.u1_1, self.cl_data.u1_1_pitch, # U^{n} \
|
|
||||||
self.cl_data.v1_1, self.cl_data.v1_1_pitch, # V^{n}
|
|
||||||
\
|
|
||||||
self.cl_data.eta2_0, self.cl_data.eta2_0_pitch, \
|
|
||||||
self.cl_data.u2_1, self.cl_data.u2_1_pitch, \
|
|
||||||
self.cl_data.v2_1, self.cl_data.v2_1_pitch)
|
|
||||||
|
|
||||||
self.u_kernel.computeUKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, \
|
|
||||||
self.r1, self.r2, \
|
|
||||||
self.A, \
|
|
||||||
self.rho1, self.rho2, \
|
|
||||||
\
|
|
||||||
self.cl_data.h1_0, self.cl_data.h1_0_pitch, \
|
|
||||||
self.cl_data.eta1_1, self.cl_data.eta1_1_pitch, # eta^{n} \
|
|
||||||
self.cl_data.u1_0, self.cl_data.u1_0_pitch, # U^{n-1} => U^{n+1} \
|
|
||||||
self.cl_data.u1_1, self.cl_data.u1_1_pitch, # U^{n} \
|
|
||||||
self.cl_data.v1_1, self.cl_data.v1_1_pitch, # V^{n} \
|
|
||||||
\
|
|
||||||
self.cl_data.h2_0, self.cl_data.h2_0_pitch, \
|
|
||||||
self.cl_data.eta2_1, self.cl_data.eta2_1_pitch, \
|
|
||||||
self.cl_data.u2_0, self.cl_data.u2_0_pitch, \
|
|
||||||
self.cl_data.u2_1, self.cl_data.u2_1_pitch, \
|
|
||||||
self.cl_data.v2_1, self.cl_data.v2_1_pitch, \
|
|
||||||
\
|
|
||||||
self.wind_type, \
|
|
||||||
self.wind_tau0, self.wind_alpha, self.wind_xm, self.wind_Rc, \
|
|
||||||
self.wind_x0, self.wind_y0, \
|
|
||||||
self.wind_u0, self.wind_v0, \
|
|
||||||
self.t)
|
|
||||||
|
|
||||||
self.v_kernel.computeVKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, \
|
|
||||||
self.r1, self.r2, \
|
|
||||||
self.A, \
|
|
||||||
self.rho1, self.rho2, \
|
|
||||||
\
|
|
||||||
self.cl_data.h1_0, self.cl_data.h1_0_pitch, \
|
|
||||||
self.cl_data.eta1_1, self.cl_data.eta1_1_pitch, # eta^{n} \
|
|
||||||
self.cl_data.u1_1, self.cl_data.u1_1_pitch, # U^{n} \
|
|
||||||
self.cl_data.v1_0, self.cl_data.v1_0_pitch, # V^{n-1} => V^{n+1} \
|
|
||||||
self.cl_data.v1_1, self.cl_data.v1_1_pitch, # V^{n} \
|
|
||||||
\
|
|
||||||
self.cl_data.h2_0, self.cl_data.h2_0_pitch, \
|
|
||||||
self.cl_data.eta2_1, self.cl_data.eta2_1_pitch, \
|
|
||||||
self.cl_data.u2_1, self.cl_data.u2_1_pitch, \
|
|
||||||
self.cl_data.v2_0, self.cl_data.v2_0_pitch, \
|
|
||||||
self.cl_data.v2_1, self.cl_data.v2_1_pitch, \
|
|
||||||
\
|
|
||||||
self.wind_type, \
|
|
||||||
self.wind_tau0, self.wind_alpha, self.wind_xm, self.wind_Rc, \
|
|
||||||
self.wind_x0, self.wind_y0, \
|
|
||||||
self.wind_u0, self.wind_v0, \
|
|
||||||
self.t)
|
|
||||||
|
|
||||||
|
|
||||||
#After the kernels, swap the data pointers
|
|
||||||
self.cl_data.swap()
|
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
return self.t
|
|
||||||
|
|
||||||
"""
|
|
||||||
Static function which reads a text file and creates an OpenCL kernel from that
|
|
||||||
"""
|
|
||||||
def get_kernel(self, kernel_filename):
|
|
||||||
#Read the proper program
|
|
||||||
module_path = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
fullpath = os.path.join(module_path, kernel_filename)
|
|
||||||
with open(fullpath, "r") as kernel_file:
|
|
||||||
kernel_string = kernel_file.read()
|
|
||||||
kernel = cl.Program(self.cl_ctx, kernel_string).build()
|
|
||||||
|
|
||||||
return kernel
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.cl_data.download(self.cl_queue)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,414 +0,0 @@
|
|||||||
/**
|
|
||||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
||||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#define block_height 8
|
|
||||||
#define block_width 8
|
|
||||||
|
|
||||||
|
|
||||||
typedef __local float eta_shmem[block_height+2][block_width+1];
|
|
||||||
typedef __local float u_shmem[block_height+2][block_width+2];
|
|
||||||
typedef __local float v_shmem[block_height+1][block_width+1];
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
float windStressX(int wind_stress_type_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
float X = 0.0f;
|
|
||||||
|
|
||||||
switch (wind_stress_type_) {
|
|
||||||
case 0: //UNIFORM_ALONGSHORE
|
|
||||||
{
|
|
||||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
|
||||||
X = tau0_/rho_ * exp(-alpha_*y);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: //BELL_SHAPED_ALONGSHORE
|
|
||||||
if (t_ <= 48.0f*3600.0f) {
|
|
||||||
const float a = alpha_*((get_global_id(0)+0.5f)*dx_-xm_);
|
|
||||||
const float aa = a*a;
|
|
||||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
|
||||||
X = tau0_/rho_ * exp(-aa) * exp(-alpha_*y);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2: //MOVING_CYCLONE
|
|
||||||
{
|
|
||||||
const float x = (get_global_id(0))*dx_;
|
|
||||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
|
||||||
const float a = (x-x0_-u0_*(t_+dt_));
|
|
||||||
const float aa = a*a;
|
|
||||||
const float b = (y-y0_-v0_*(t_+dt_));
|
|
||||||
const float bb = b*b;
|
|
||||||
const float r = sqrt(aa+bb);
|
|
||||||
const float c = 1.0f - r/Rc_;
|
|
||||||
const float xi = c*c;
|
|
||||||
|
|
||||||
X = -(tau0_/rho_) * (b/Rc_) * exp(-0.5f*xi);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves U one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeUKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r1_, //< Inter-layer friction coefficient
|
|
||||||
float r2_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Numerical diffusion
|
|
||||||
float A_,
|
|
||||||
|
|
||||||
//Density of each layer
|
|
||||||
float rho1_,
|
|
||||||
float rho2_,
|
|
||||||
|
|
||||||
//Data for layer 1
|
|
||||||
__global float* H1_ptr_, int H1_pitch_,
|
|
||||||
__global float* eta1_1_ptr_, int eta1_1_pitch_, // eta^n
|
|
||||||
__global float* U1_0_ptr_, int U1_0_pitch_, // U^n-1, also output, U^n+1
|
|
||||||
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
|
|
||||||
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
|
|
||||||
|
|
||||||
//Data for layer 2
|
|
||||||
__global float* H2_ptr_, int H2_pitch_,
|
|
||||||
__global float* eta2_1_ptr_, int eta2_1_pitch_, // eta^n
|
|
||||||
__global float* U2_0_ptr_, int U2_0_pitch_, // U^n-1, also output, U^n+1
|
|
||||||
__global float* U2_1_ptr_, int U2_1_pitch_, // U^n
|
|
||||||
__global float* V2_1_ptr_, int V2_1_pitch_, // V^n
|
|
||||||
|
|
||||||
// Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
eta_shmem H1_shared;
|
|
||||||
eta_shmem eta1_shared;
|
|
||||||
u_shmem U1_shared;
|
|
||||||
v_shmem V1_shared;
|
|
||||||
|
|
||||||
eta_shmem H2_shared;
|
|
||||||
eta_shmem eta2_shared;
|
|
||||||
u_shmem U2_shared;
|
|
||||||
v_shmem V2_shared;
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Start of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
||||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = bx + tx;
|
|
||||||
const int tj = by + ty;
|
|
||||||
|
|
||||||
//Compute pointer to current row in the U array
|
|
||||||
__global float* const U1_0_row = (__global float*) ((__global char*) U1_0_ptr_ + U1_0_pitch_*tj);
|
|
||||||
__global float* const U2_0_row = (__global float*) ((__global char*) U2_0_ptr_ + U2_0_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current U
|
|
||||||
float U1_0 = 0.0f;
|
|
||||||
float U2_0 = 0.0f;
|
|
||||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
|
||||||
U1_0 = U1_0_row[ti];
|
|
||||||
U2_0 = U2_0_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read H and eta into shared memory: (nx+1)*(ny+2) cells
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int l = clamp(by + j - 1, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the H and eta arrays
|
|
||||||
__global float* const H1_row = (__global float*) ((__global char*) H1_ptr_ + H1_pitch_*l);
|
|
||||||
__global float* const H2_row = (__global float*) ((__global char*) H2_ptr_ + H2_pitch_*l);
|
|
||||||
|
|
||||||
__global float* const eta1_1_row = (__global float*) ((__global char*) eta1_1_ptr_ + eta1_1_pitch_*l);
|
|
||||||
__global float* const eta2_1_row = (__global float*) ((__global char*) eta2_1_ptr_ + eta2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i, 1, nx_);
|
|
||||||
|
|
||||||
H1_shared[j][i] = H1_row[k];
|
|
||||||
H2_shared[j][i] = H2_row[k];
|
|
||||||
|
|
||||||
eta1_shared[j][i] = eta1_1_row[k];
|
|
||||||
eta2_shared[j][i] = eta2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory: (nx+2)*(ny+2) cells
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int l = clamp(by + j - 1, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const U1_1_row = (__global float*) ((__global char*) U1_1_ptr_ + U1_1_pitch_*l);
|
|
||||||
__global float* const U2_1_row = (__global float*) ((__global char*) U2_1_ptr_ + U2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int k = clamp(bx + i - 1, 0, nx_);
|
|
||||||
|
|
||||||
U1_shared[j][i] = U1_1_row[k];
|
|
||||||
U2_shared[j][i] = U2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//Read V into shared memory: (nx+1)*(ny+1) cells
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int l = clamp(by + j - 1, 0, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const V1_1_row = (__global float*) ((__global char*) V1_1_ptr_ + V1_1_pitch_*l);
|
|
||||||
__global float* const V2_1_row = (__global float*) ((__global char*) V2_1_ptr_ + V2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i, 1, nx_);
|
|
||||||
|
|
||||||
V1_shared[j][i] = V1_1_row[k];
|
|
||||||
V2_shared[j][i] = V2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Now get all our required variables as short-hands
|
|
||||||
* here we use the notation of
|
|
||||||
* Var1_00 as var_i,j for layer 1
|
|
||||||
* Var2_p0 as var_i+1,j for layer 2
|
|
||||||
* Var1_0m as var_i,j-1 for layer 1
|
|
||||||
* etc
|
|
||||||
*/
|
|
||||||
//Layer 1
|
|
||||||
const float U1_00 = U1_shared[ty+1][tx+1]; //U at "center"
|
|
||||||
const float U1_0p = U1_shared[ty+2][tx+1]; //U at "north"
|
|
||||||
const float U1_0m = U1_shared[ty ][tx+1]; //U at "south"
|
|
||||||
const float U1_p0 = U1_shared[ty+1][tx+2]; //U at "east"
|
|
||||||
const float U1_m0 = U1_shared[ty+1][tx ]; //U at "west"
|
|
||||||
|
|
||||||
const float V1_00 = V1_shared[ty+1][tx ];
|
|
||||||
const float V1_p0 = V1_shared[ty+1][tx+1];
|
|
||||||
const float V1_0m = V1_shared[ty ][tx ];
|
|
||||||
const float V1_pm = V1_shared[ty ][tx+1];
|
|
||||||
|
|
||||||
const float H1_0m = H1_shared[ty ][tx ];
|
|
||||||
const float H1_00 = H1_shared[ty+1][tx ];
|
|
||||||
const float H1_0p = H1_shared[ty+2][tx ];
|
|
||||||
const float H1_pm = H1_shared[ty ][tx+1];
|
|
||||||
const float H1_p0 = H1_shared[ty+1][tx+1];
|
|
||||||
const float H1_pp = H1_shared[ty+2][tx+1];
|
|
||||||
|
|
||||||
const float eta1_0m = eta1_shared[ty ][tx ];
|
|
||||||
const float eta1_00 = eta1_shared[ty+1][tx ];
|
|
||||||
const float eta1_0p = eta1_shared[ty+2][tx ];
|
|
||||||
const float eta1_pm = eta1_shared[ty ][tx+1];
|
|
||||||
const float eta1_p0 = eta1_shared[ty+1][tx+1];
|
|
||||||
const float eta1_pp = eta1_shared[ty+2][tx+1];
|
|
||||||
|
|
||||||
|
|
||||||
//Layer 2 (bottom)
|
|
||||||
const float U2_00 = U2_shared[ty+1][tx+1];
|
|
||||||
const float U2_0p = U2_shared[ty+2][tx+1];
|
|
||||||
const float U2_0m = U2_shared[ty ][tx+1];
|
|
||||||
const float U2_p0 = U2_shared[ty+1][tx+2];
|
|
||||||
const float U2_m0 = U2_shared[ty+1][tx ];
|
|
||||||
|
|
||||||
const float V2_00 = V2_shared[ty+1][tx ];
|
|
||||||
const float V2_p0 = V2_shared[ty+1][tx+1];
|
|
||||||
const float V2_0m = V2_shared[ty ][tx ];
|
|
||||||
const float V2_pm = V2_shared[ty ][tx+1];
|
|
||||||
|
|
||||||
const float H2_0m = H2_shared[ty ][tx ];
|
|
||||||
const float H2_00 = H2_shared[ty+1][tx ];
|
|
||||||
const float H2_0p = H2_shared[ty+2][tx ];
|
|
||||||
const float H2_pm = H2_shared[ty ][tx+1];
|
|
||||||
const float H2_p0 = H2_shared[ty+1][tx+1];
|
|
||||||
const float H2_pp = H2_shared[ty+2][tx+1];
|
|
||||||
|
|
||||||
const float eta2_0m = eta2_shared[ty ][tx ];
|
|
||||||
const float eta2_00 = eta2_shared[ty+1][tx ];
|
|
||||||
const float eta2_0p = eta2_shared[ty+2][tx ];
|
|
||||||
const float eta2_pm = eta2_shared[ty ][tx+1];
|
|
||||||
const float eta2_p0 = eta2_shared[ty+1][tx+1];
|
|
||||||
const float eta2_pp = eta2_shared[ty+2][tx+1];
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct Eta_bar at the V position
|
|
||||||
const float eta1_bar_0m = 0.25f*(eta1_0m + eta1_pm + eta1_00 + eta1_p0);
|
|
||||||
const float eta1_bar_00 = 0.25f*(eta1_00 + eta1_p0 + eta1_0p + eta1_pp);
|
|
||||||
|
|
||||||
const float eta2_bar_0m = 0.25f*(eta2_0m + eta2_pm + eta2_00 + eta2_p0);
|
|
||||||
const float eta2_bar_00 = 0.25f*(eta2_00 + eta2_p0 + eta2_0p + eta2_pp);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct H_bar and H_x (at the U position)
|
|
||||||
const float H1_bar_0m = 0.25f*(H1_0m + H1_pm + H1_00 + H1_p0);
|
|
||||||
const float H1_bar_00 = 0.25f*(H1_00 + H1_p0 + H1_0p + H1_pp);
|
|
||||||
const float H1_x = 0.5f*(H1_00 + H1_p0);
|
|
||||||
|
|
||||||
const float H2_bar_0m = 0.25f*(H2_0m + H2_pm + H2_00 + H2_p0);
|
|
||||||
const float H2_bar_00 = 0.25f*(H2_00 + H2_p0 + H2_0p + H2_pp);
|
|
||||||
const float H2_x = 0.5f*(H2_00 + H2_p0);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Compute layer thickness of top layer
|
|
||||||
const float h1_p0 = H1_p0 + eta1_p0 - eta2_p0;
|
|
||||||
const float h1_00 = H1_00 + eta1_00 - eta2_00;
|
|
||||||
const float h1_bar_0m = H1_bar_0m + eta1_bar_0m - eta2_bar_0m;
|
|
||||||
const float h1_bar_00 = H1_bar_00 + eta1_bar_00 - eta2_bar_00;
|
|
||||||
|
|
||||||
const float h2_p0 = H2_p0 + eta2_p0;
|
|
||||||
const float h2_00 = H2_00 + eta2_00;
|
|
||||||
const float h2_bar_0m = H2_bar_0m + eta2_bar_0m;
|
|
||||||
const float h2_bar_00 = H2_bar_00 + eta2_bar_00;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Compute pressure components
|
|
||||||
const float h1_x = 0.5f*(h1_p0 + h1_00);
|
|
||||||
const float h2_x = 0.5f*(h2_p0 + h2_00);
|
|
||||||
|
|
||||||
//const float epsilon = (rho2_ - rho1_)/rho2_;
|
|
||||||
//const float P1_x = -g_*h1_x * (eta1_p0 - eta1_00 + h2_p0 - h2_00) * (1.0f - epsilon);
|
|
||||||
//const float P2_x = -g_*h2_x * (eta2_p0 - eta2_00 + H2_p0 - H2_00);
|
|
||||||
|
|
||||||
const float P1_x = - g_*h1_x*(eta1_p0 - eta1_00) - 0.5f*g_*(eta1_p0*eta1_p0 - eta1_00*eta1_00);
|
|
||||||
const float P2_x = - g_ * (rho1_/rho2_) *
|
|
||||||
( //Pressure contribution from top layer
|
|
||||||
h2_x*(eta1_p0 - eta1_00) + 0.5f*(eta1_p0*eta1_p0 - eta1_00*eta1_00)
|
|
||||||
)
|
|
||||||
- g_ * ((rho2_ - rho1_)/rho2_) *
|
|
||||||
( //Pressure contribution from bottom layer
|
|
||||||
h2_x*(eta2_p0 - eta2_00) + 0.5f*(eta2_p0*eta2_p0 - eta2_00*eta2_00)
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct V at the U position
|
|
||||||
const float V1_bar = 0.25f*(V1_0m + V1_00 + V1_pm + V1_p0);
|
|
||||||
const float V2_bar = 0.25f*(V2_0m + V2_00 + V2_pm + V2_p0);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate the bottom and/or inter-layer friction coefficient
|
|
||||||
//FIXME: Should this be h instead of H?
|
|
||||||
const float C1 = r1_/H1_x;
|
|
||||||
const float C2 = r2_/H2_x;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate numerical diffusion / subgrid energy loss coefficient
|
|
||||||
const float D = 2.0f*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate nonlinear effects
|
|
||||||
const float N1_a = (U1_p0 + U1_00)*(U1_p0 + U1_00) / (h1_p0);
|
|
||||||
const float N1_b = (U1_00 + U1_m0)*(U1_00 + U1_m0) / (h1_00);
|
|
||||||
const float N1_c = (U1_0p + U1_00)*(V1_p0 + V1_00) / (h1_bar_00);
|
|
||||||
const float N1_d = (U1_00 + U1_0m)*(V1_pm + V1_0m) / (h1_bar_0m);
|
|
||||||
const float N1 = 0.25f*( N1_a - N1_b + (dx_/dy_)*(N1_c - N1_d) );
|
|
||||||
|
|
||||||
const float N2_a = (U2_p0 + U2_00)*(U2_p0 + U2_00) / (h2_p0);
|
|
||||||
const float N2_b = (U2_00 + U2_m0)*(U2_00 + U2_m0) / (h2_00);
|
|
||||||
const float N2_c = (U2_0p + U2_00)*(V2_p0 + V2_00) / (h2_bar_00);
|
|
||||||
const float N2_d = (U2_00 + U2_0m)*(V2_pm + V2_0m) / (h2_bar_0m);
|
|
||||||
const float N2 = 0.25f*( N2_a - N2_b + (dx_/dy_)*(N2_c - N2_d) );
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate eddy viscosity terms
|
|
||||||
const float E1 = (U1_p0 - U1_0 + U1_m0)/(dx_*dx_) + (U1_0p - U1_0 + U1_0m)/(dy_*dy_);
|
|
||||||
const float E2 = (U2_p0 - U2_0 + U2_m0)/(dx_*dx_) + (U2_0p - U2_0 + U2_0m)/(dy_*dy_);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate the wind shear stress for the top layer
|
|
||||||
const float X = windStressX(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho1_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Compute U at the next timestep
|
|
||||||
float U1_2 = (U1_0 + 2.0f*dt_*(f_*V1_bar + (N1 + P1_x)/dx_ + X + C1*U2_0 + A_*E1) ) / (1.0f + D);
|
|
||||||
float U2_2 = (U2_0 + 2.0f*dt_*(f_*V2_bar + (N2 + P2_x)/dx_ + C1*U1_0 + A_*E2) ) / (1.0f + 2.0f*dt_*C2 + D);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Write to main memory for internal cells
|
|
||||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
|
||||||
U1_0_row[ti] = U1_2;
|
|
||||||
U2_0_row[ti] = U2_2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,395 +0,0 @@
|
|||||||
/**
|
|
||||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
||||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define block_height 8
|
|
||||||
#define block_width 8
|
|
||||||
|
|
||||||
typedef __local float eta_shmem[block_height+1][block_width+2];
|
|
||||||
typedef __local float u_shmem[block_height+1][block_width+1];
|
|
||||||
typedef __local float v_shmem[block_height+2][block_width+2];
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
float windStressY(int wind_stress_type_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
float Y = 0.0f;
|
|
||||||
|
|
||||||
switch (wind_stress_type_) {
|
|
||||||
case 2: //MOVING_CYCLONE:
|
|
||||||
{
|
|
||||||
const float x = (get_global_id(0)+0.5f)*dx_;
|
|
||||||
const float y = (get_global_id(1))*dy_;
|
|
||||||
const float a = (x-x0_-u0_*(t_+dt_));
|
|
||||||
const float aa = a*a;
|
|
||||||
const float b = (y-y0_-v0_*(t_+dt_));
|
|
||||||
const float bb = b*b;
|
|
||||||
const float r = sqrt(aa+bb);
|
|
||||||
const float c = 1.0f - r/Rc_;
|
|
||||||
const float xi = c*c;
|
|
||||||
|
|
||||||
Y = (tau0_/rho_) * (a/Rc_) * exp(-0.5f*xi);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Y;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves V one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeVKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r1_, //< Inter-layer friction coefficient
|
|
||||||
float r2_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Numerical diffusion
|
|
||||||
float A_,
|
|
||||||
|
|
||||||
//Density of each layer
|
|
||||||
float rho1_,
|
|
||||||
float rho2_,
|
|
||||||
|
|
||||||
//Data for layer 1
|
|
||||||
__global float* H1_ptr_, int H1_pitch_,
|
|
||||||
__global float* eta1_1_ptr_, int eta1_1_pitch_, // eta^n
|
|
||||||
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
|
|
||||||
__global float* V1_0_ptr_, int V1_0_pitch_, // V^n-1, also output V^n+1
|
|
||||||
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
|
|
||||||
|
|
||||||
//Data for layer 2
|
|
||||||
__global float* H2_ptr_, int H2_pitch_,
|
|
||||||
__global float* eta2_1_ptr_, int eta2_1_pitch_,
|
|
||||||
__global float* U2_1_ptr_, int U2_1_pitch_,
|
|
||||||
__global float* V2_0_ptr_, int V2_0_pitch_,
|
|
||||||
__global float* V2_1_ptr_, int V2_1_pitch_,
|
|
||||||
|
|
||||||
// Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
eta_shmem H1_shared;
|
|
||||||
eta_shmem eta1_shared;
|
|
||||||
u_shmem U1_shared;
|
|
||||||
v_shmem V1_shared;
|
|
||||||
|
|
||||||
eta_shmem H2_shared;
|
|
||||||
eta_shmem eta2_shared;
|
|
||||||
u_shmem U2_shared;
|
|
||||||
v_shmem V2_shared;
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Start of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
||||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = bx + tx;
|
|
||||||
const int tj = by + ty;
|
|
||||||
|
|
||||||
//Compute pointer to current row in the V array
|
|
||||||
__global float* const V1_0_row = (__global float*) ((__global char*) V1_0_ptr_ + V1_0_pitch_*tj);
|
|
||||||
__global float* const V2_0_row = (__global float*) ((__global char*) V2_0_ptr_ + V2_0_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current V
|
|
||||||
float V1_0 = 0.0f;
|
|
||||||
float V2_0 = 0.0f;
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
|
||||||
V1_0 = V1_0_row[ti];
|
|
||||||
V2_0 = V2_0_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read H and eta into shared memory: (nx+2)*(ny+1) cells
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int l = clamp(by + j, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the H and eta arrays
|
|
||||||
__global float* const H1_row = (__global float*) ((__global char*) H1_ptr_ + H1_pitch_*l);
|
|
||||||
__global float* const H2_row = (__global float*) ((__global char*) H2_ptr_ + H2_pitch_*l);
|
|
||||||
|
|
||||||
__global float* const eta1_1_row = (__global float*) ((__global char*) eta1_1_ptr_ + eta1_1_pitch_*l);
|
|
||||||
__global float* const eta2_1_row = (__global float*) ((__global char*) eta2_1_ptr_ + eta2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i - 1, 1, nx_);
|
|
||||||
|
|
||||||
H1_shared[j][i] = H1_row[k];
|
|
||||||
H2_shared[j][i] = H2_row[k];
|
|
||||||
|
|
||||||
eta1_shared[j][i] = eta1_1_row[k];
|
|
||||||
eta2_shared[j][i] = eta2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory: (nx+1)*(ny+1) cells
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int l = clamp(by + j, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const U1_1_row = (__global float*) ((__global char*) U1_1_ptr_ + U1_1_pitch_*l);
|
|
||||||
__global float* const U2_1_row = (__global float*) ((__global char*) U2_1_ptr_ + U2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int k = clamp(bx + i - 1, 0, nx_);
|
|
||||||
|
|
||||||
U1_shared[j][i] = U1_1_row[k];
|
|
||||||
U2_shared[j][i] = U2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//Read V into shared memory: (nx+2)*(ny+2) cells
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int l = clamp(by + j - 1, 0, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const V1_1_row = (__global float*) ((__global char*) V1_1_ptr_ + V1_1_pitch_*l);
|
|
||||||
__global float* const V2_1_row = (__global float*) ((__global char*) V2_1_ptr_ + V2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i - 1, 1, nx_);
|
|
||||||
|
|
||||||
V1_shared[j][i] = V1_1_row[k];
|
|
||||||
V2_shared[j][i] = V2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Now get all our required variables as short-hands
|
|
||||||
* here we use the notation of
|
|
||||||
* Var_00 as var_i,j
|
|
||||||
* Var_p0 as var_i+1,j
|
|
||||||
* Var_0m as var_i,j-1
|
|
||||||
* etc
|
|
||||||
*/
|
|
||||||
//Layer 1
|
|
||||||
const float V1_00 = V1_shared[ty+1][tx+1]; //V at "center"
|
|
||||||
const float V1_0p = V1_shared[ty+2][tx+1]; //V at "north"
|
|
||||||
const float V1_0m = V1_shared[ty ][tx+1]; //V at "south"
|
|
||||||
const float V1_p0 = V1_shared[ty+1][tx+2]; //V at "east"
|
|
||||||
const float V1_m0 = V1_shared[ty+1][tx ]; //V at "west"
|
|
||||||
|
|
||||||
const float U1_00 = U1_shared[ty ][tx+1];
|
|
||||||
const float U1_0p = U1_shared[ty+1][tx+1];
|
|
||||||
const float U1_m0 = U1_shared[ty ][tx ];
|
|
||||||
const float U1_mp = U1_shared[ty+1][tx ];
|
|
||||||
|
|
||||||
const float H1_m0 = H1_shared[ty ][tx ];
|
|
||||||
const float H1_00 = H1_shared[ty ][tx+1];
|
|
||||||
const float H1_p0 = H1_shared[ty ][tx+2];
|
|
||||||
const float H1_mp = H1_shared[ty+1][tx ];
|
|
||||||
const float H1_0p = H1_shared[ty+1][tx+1];
|
|
||||||
const float H1_pp = H1_shared[ty+1][tx+2];
|
|
||||||
|
|
||||||
const float eta1_m0 = eta1_shared[ty ][tx ];
|
|
||||||
const float eta1_00 = eta1_shared[ty ][tx+1];
|
|
||||||
const float eta1_p0 = eta1_shared[ty ][tx+2];
|
|
||||||
const float eta1_mp = eta1_shared[ty+1][tx ];
|
|
||||||
const float eta1_0p = eta1_shared[ty+1][tx+1];
|
|
||||||
const float eta1_pp = eta1_shared[ty+1][tx+2];
|
|
||||||
|
|
||||||
|
|
||||||
//Layer 2 (bottom)
|
|
||||||
const float V2_00 = V2_shared[ty+1][tx+1];
|
|
||||||
const float V2_0p = V2_shared[ty+2][tx+1];
|
|
||||||
const float V2_0m = V2_shared[ty ][tx+1];
|
|
||||||
const float V2_p0 = V2_shared[ty+1][tx+2];
|
|
||||||
const float V2_m0 = V2_shared[ty+1][tx ];
|
|
||||||
|
|
||||||
const float U2_00 = U2_shared[ty ][tx+1];
|
|
||||||
const float U2_0p = U2_shared[ty+1][tx+1];
|
|
||||||
const float U2_m0 = U2_shared[ty ][tx ];
|
|
||||||
const float U2_mp = U2_shared[ty+1][tx ];
|
|
||||||
|
|
||||||
const float H2_m0 = H2_shared[ty ][tx ];
|
|
||||||
const float H2_00 = H2_shared[ty ][tx+1];
|
|
||||||
const float H2_p0 = H2_shared[ty ][tx+2];
|
|
||||||
const float H2_mp = H2_shared[ty+1][tx ];
|
|
||||||
const float H2_0p = H2_shared[ty+1][tx+1];
|
|
||||||
const float H2_pp = H2_shared[ty+1][tx+2];
|
|
||||||
|
|
||||||
const float eta2_m0 = eta2_shared[ty ][tx ];
|
|
||||||
const float eta2_00 = eta2_shared[ty ][tx+1];
|
|
||||||
const float eta2_p0 = eta2_shared[ty ][tx+2];
|
|
||||||
const float eta2_mp = eta2_shared[ty+1][tx ];
|
|
||||||
const float eta2_0p = eta2_shared[ty+1][tx+1];
|
|
||||||
const float eta2_pp = eta2_shared[ty+1][tx+2];
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct Eta_bar at the V position
|
|
||||||
const float eta1_bar_m0 = 0.25f*(eta1_m0 + eta1_mp + eta1_00 + eta1_0p);
|
|
||||||
const float eta1_bar_00 = 0.25f*(eta1_00 + eta1_0p + eta1_p0 + eta1_pp);
|
|
||||||
|
|
||||||
const float eta2_bar_m0 = 0.25f*(eta2_m0 + eta2_mp + eta2_00 + eta2_0p);
|
|
||||||
const float eta2_bar_00 = 0.25f*(eta2_00 + eta2_0p + eta2_p0 + eta2_pp);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct H_bar and H_y (at the V position)
|
|
||||||
const float H1_bar_m0 = 0.25f*(H1_m0 + H1_mp + H1_00 + H1_0p);
|
|
||||||
const float H1_bar_00 = 0.25f*(H1_00 + H1_0p + H1_p0 + H1_pp);
|
|
||||||
const float H1_y = 0.5f*(H1_00 + H1_0p);
|
|
||||||
|
|
||||||
const float H2_bar_m0 = 0.25f*(H2_m0 + H2_mp + H2_00 + H2_0p);
|
|
||||||
const float H2_bar_00 = 0.25f*(H2_00 + H2_0p + H2_p0 + H2_pp);
|
|
||||||
const float H2_y = 0.5f*(H2_00 + H2_0p);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Compute layer thickness of top layer
|
|
||||||
const float h1_0p = H1_0p + eta1_0p - eta2_0p;
|
|
||||||
const float h1_00 = H1_00 + eta1_00 - eta2_00;
|
|
||||||
const float h1_bar_00 = H1_bar_00 + eta1_bar_00 - eta2_bar_00;
|
|
||||||
const float h1_bar_m0 = H1_bar_m0 + eta1_bar_m0 - eta2_bar_m0;
|
|
||||||
|
|
||||||
const float h2_0p = H2_0p + eta2_0p;
|
|
||||||
const float h2_00 = H2_00 + eta2_00;
|
|
||||||
const float h2_bar_00 = H2_bar_00 + eta2_bar_00;
|
|
||||||
const float h2_bar_m0 = H2_bar_m0 + eta2_bar_m0;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Compute pressure components
|
|
||||||
const float h1_y = 0.5f*(h1_0p + h1_00);
|
|
||||||
const float h2_y = 0.5f*(h2_0p + h2_00);
|
|
||||||
|
|
||||||
//const float epsilon = (rho2_ - rho1_)/rho2_;
|
|
||||||
//const float P1_y = -0.5f*g_*(h1_0p + h1_00) * (eta1_0p - eta1_00 + h2_0p - h2_00) * (1.0f - epsilon);
|
|
||||||
//const float P2_y = -0.5f*g_*(h2_0p + h2_00) * (eta2_0p - eta2_00 + H2_0p - H2_00);
|
|
||||||
|
|
||||||
const float P1_y = -g_*h1_y*(eta1_0p - eta1_00) - 0.5f*g_*(eta1_0p*eta1_0p - eta1_00*eta1_00);
|
|
||||||
|
|
||||||
const float P2_y = -g_ * (rho1_/rho2_) *
|
|
||||||
( //Pressure contribution from top layer
|
|
||||||
h2_y*(eta1_0p - eta1_00) + 0.5f*(eta1_0p*eta1_0p - eta1_00*eta1_00)
|
|
||||||
)
|
|
||||||
-g_ * ((rho2_ - rho1_)/rho2_) *
|
|
||||||
( //Pressure contribution from bottom layer
|
|
||||||
h2_y*(eta2_0p - eta2_00) + 0.5f*(eta2_0p*eta2_0p - eta2_00*eta2_00)
|
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct U at the V position
|
|
||||||
const float U1_bar = 0.25f*(U1_m0 + U1_00 + U1_mp + U1_0p);
|
|
||||||
const float U2_bar = 0.25f*(U2_m0 + U2_00 + U2_mp + U2_0p);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate the friction coefficient
|
|
||||||
//FIXME: Should this be h instead of H?
|
|
||||||
const float C1 = r1_/H1_y;
|
|
||||||
const float C2 = r2_/H2_y;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate numerical diffusion / subgrid energy loss coefficient
|
|
||||||
const float D = 2.0f*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate nonlinear effects
|
|
||||||
const float N1_a = (V1_0p + V1_00)*(V1_0p + V1_00) / (h1_0p);
|
|
||||||
const float N1_b = (V1_00 + V1_0m)*(V1_00 + V1_0m) / (h1_00);
|
|
||||||
const float N1_c = (U1_0p + U1_00)*(V1_p0 + V1_00) / (h1_bar_00);
|
|
||||||
const float N1_d = (U1_mp + U1_m0)*(V1_00 + V1_m0) / (h1_bar_m0);
|
|
||||||
const float N1 = 0.25f*( N1_a - N1_b + (dy_/dx_)*(N1_c - N1_d) );
|
|
||||||
|
|
||||||
const float N2_a = (V2_0p + V2_00)*(V2_0p + V2_00) / (h2_0p);
|
|
||||||
const float N2_b = (V2_00 + V2_0m)*(V2_00 + V2_0m) / (h2_00);
|
|
||||||
const float N2_c = (U2_0p + U2_00)*(V2_p0 + V2_00) / (h2_bar_00);
|
|
||||||
const float N2_d = (U2_mp + U2_m0)*(V2_00 + V2_m0) / (h2_bar_m0);
|
|
||||||
const float N2 = 0.25f*( N2_a - N2_b + (dy_/dx_)*(N2_c - N2_d) );
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate eddy viscosity term
|
|
||||||
const float E1 = (V1_p0 - V1_0 + V1_m0)/(dx_*dx_) + (V1_0p - V1_0 + V1_0m)/(dy_*dy_);
|
|
||||||
const float E2 = (V2_p0 - V2_0 + V2_m0)/(dx_*dx_) + (V2_0p - V2_0 + V2_0m)/(dy_*dy_);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//Calculate the wind shear stress
|
|
||||||
const float Y = windStressY(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho1_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
//Compute the V at the next timestep
|
|
||||||
float V1_2 = (V1_0 + 2.0f*dt_*(-f_*U1_bar + (N1 + P1_y)/dy_ + Y + C1*V2_0 + A_*E1) ) / (1.0f + D);
|
|
||||||
float V2_2 = (V2_0 + 2.0f*dt_*(-f_*U2_bar + (N2 + P2_y)/dy_ + C1*V1_0 + A_*E2) ) / (1.0f + 2.0f*dt_*C2 + D);
|
|
||||||
|
|
||||||
//Write to main memory for internal cells
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
|
||||||
V1_0_row[ti] = V1_2;
|
|
||||||
V2_0_row[ti] = V2_2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,128 +0,0 @@
|
|||||||
/**
|
|
||||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
||||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define block_height 8
|
|
||||||
#define block_width 8
|
|
||||||
|
|
||||||
typedef __local float u_shmem[block_height][block_width+1];
|
|
||||||
typedef __local float v_shmem[block_height+1][block_width];
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves eta one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeEtaKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Data for layer 1
|
|
||||||
__global float* eta1_0_ptr_, int eta1_0_pitch_, //eta_1^n-1 (also used as output, that is eta_1^n+1)
|
|
||||||
__global float* U1_1_ptr_, int U1_1_pitch_, // U^n
|
|
||||||
__global float* V1_1_ptr_, int V1_1_pitch_, // V^n
|
|
||||||
|
|
||||||
//Data for layer 2
|
|
||||||
__global float* eta2_0_ptr_, int eta2_0_pitch_, //eta_2^n-1 (also used as output, that is eta_2^n+1)
|
|
||||||
__global float* U2_1_ptr_, int U2_1_pitch_, // U^n
|
|
||||||
__global float* V2_1_ptr_, int V2_1_pitch_ // V^n
|
|
||||||
) {
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Start of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
||||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = bx + tx;
|
|
||||||
const int tj = by + ty;
|
|
||||||
|
|
||||||
//Layer 1
|
|
||||||
u_shmem U1_1_shared;
|
|
||||||
v_shmem V1_1_shared;
|
|
||||||
|
|
||||||
//Layer 2
|
|
||||||
u_shmem U2_1_shared;
|
|
||||||
v_shmem V2_1_shared;
|
|
||||||
|
|
||||||
//Compute pointer to current row in the eta arrays
|
|
||||||
__global float* eta1_0_row = (__global float*) ((__global char*) eta1_0_ptr_ + eta1_0_pitch_*tj);
|
|
||||||
__global float* eta2_0_row = (__global float*) ((__global char*) eta2_0_ptr_ + eta2_0_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current eta
|
|
||||||
float eta1_0 = 0.0f;
|
|
||||||
float eta2_0 = 0.0f;
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
|
||||||
eta1_0 = eta1_0_row[ti];
|
|
||||||
eta2_0 = eta2_0_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
|
||||||
const int l = clamp(by + j, 1, ny_); // fake ghost cells
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const U1_1_row = (__global float*) ((__global char*) U1_1_ptr_ + U1_1_pitch_*l);
|
|
||||||
__global float* const U2_1_row = (__global float*) ((__global char*) U2_1_ptr_ + U2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const int k = clamp(bx + i - 1, 0, nx_); // prevent out of bounds
|
|
||||||
|
|
||||||
U1_1_shared[j][i] = U1_1_row[k];
|
|
||||||
U2_1_shared[j][i] = U2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read V into shared memory
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const int l = clamp(by + j - 1, 0, ny_); // prevent out of bounds
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const V1_1_row = (__global float*) ((__global char*) V1_1_ptr_ + V1_1_pitch_*l);
|
|
||||||
__global float* const V2_1_row = (__global float*) ((__global char*) V2_1_ptr_ + V2_1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
|
||||||
const int k = clamp(bx + i, 1, nx_); // fake ghost cells
|
|
||||||
|
|
||||||
V1_1_shared[j][i] = V1_1_row[k];
|
|
||||||
V2_1_shared[j][i] = V2_1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
//Compute the H at the next timestep
|
|
||||||
float eta1_2 = eta1_0 - 2.0f*dt_/dx_ * (U1_1_shared[ty][tx+1] - U1_1_shared[ty][tx] + U2_1_shared[ty][tx+1] - U2_1_shared[ty][tx])
|
|
||||||
- 2.0f*dt_/dy_ * (V1_1_shared[ty+1][tx] - V1_1_shared[ty][tx] + V2_1_shared[ty+1][tx] - V2_1_shared[ty][tx]);
|
|
||||||
float eta2_2 = eta2_0 - 2.0f*dt_/dx_ * (U2_1_shared[ty][tx+1] - U2_1_shared[ty][tx])
|
|
||||||
- 2.0f*dt_/dy_ * (V2_1_shared[ty+1][tx] - V2_1_shared[ty][tx]);
|
|
||||||
|
|
||||||
//Write to main memory
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
|
||||||
eta1_0_row[ti] = eta1_2;
|
|
||||||
eta2_0_row[ti] = eta2_2;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,218 +0,0 @@
|
|||||||
/**
|
|
||||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
||||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "common.opencl"
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves U one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeUKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Numerical diffusion
|
|
||||||
float A_,
|
|
||||||
|
|
||||||
//Data
|
|
||||||
__global float* H_ptr_, int H_pitch_,
|
|
||||||
__global float* eta1_ptr_, int eta1_pitch_, // eta^n
|
|
||||||
__global float* U0_ptr_, int U0_pitch_, // U^n-1, also output, U^n+1
|
|
||||||
__global float* U1_ptr_, int U1_pitch_, // U^n
|
|
||||||
__global float* V1_ptr_, int V1_pitch_, // V^n
|
|
||||||
|
|
||||||
// Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
__local float H_shared[block_height+2][block_width+1];
|
|
||||||
__local float eta1_shared[block_height+2][block_width+1];
|
|
||||||
__local float U1_shared[block_height+2][block_width+2];
|
|
||||||
__local float V1_shared[block_height+1][block_width+1];
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Start of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
||||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = bx + tx;
|
|
||||||
const int tj = by + ty;
|
|
||||||
|
|
||||||
//Compute pointer to current row in the U array
|
|
||||||
__global float* const U0_row = (__global float*) ((__global char*) U0_ptr_ + U0_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current U
|
|
||||||
float U0 = 0.0f;
|
|
||||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
|
||||||
U0 = U0_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read H and eta into shared memory: (nx+1)*(ny+2) cells
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int l = clamp(by + j - 1, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the H and eta arrays
|
|
||||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
|
||||||
__global float* const eta1_row = (__global float*) ((__global char*) eta1_ptr_ + eta1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i, 1, nx_);
|
|
||||||
|
|
||||||
H_shared[j][i] = H_row[k];
|
|
||||||
eta1_shared[j][i] = eta1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory: (nx+2)*(ny+2) cells
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int l = clamp(by + j - 1, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int k = clamp(bx + i - 1, 0, nx_);
|
|
||||||
|
|
||||||
U1_shared[j][i] = U1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//Read V into shared memory: (nx+1)*(ny+1) cells
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int l = clamp(by + j - 1, 0, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i, 1, nx_);
|
|
||||||
|
|
||||||
V1_shared[j][i] = V1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Now get all our required variables as short-hands
|
|
||||||
* here we use the notation of
|
|
||||||
* Var_00 as var_i,j
|
|
||||||
* Var_p0 as var_i+1,j
|
|
||||||
* Var_0m as var_i,j-1
|
|
||||||
* etc
|
|
||||||
*/
|
|
||||||
const float U_00 = U1_shared[ty+1][tx+1]; //U at "center"
|
|
||||||
const float U_0p = U1_shared[ty+2][tx+1]; //U at "north"
|
|
||||||
const float U_0m = U1_shared[ty ][tx+1]; //U at "south"
|
|
||||||
const float U_p0 = U1_shared[ty+1][tx+2]; //U at "east"
|
|
||||||
const float U_m0 = U1_shared[ty+1][tx ]; //U at "west"
|
|
||||||
|
|
||||||
const float V_00 = V1_shared[ty+1][tx ];
|
|
||||||
const float V_p0 = V1_shared[ty+1][tx+1];
|
|
||||||
const float V_0m = V1_shared[ty ][tx ];
|
|
||||||
const float V_pm = V1_shared[ty ][tx+1];
|
|
||||||
|
|
||||||
const float H_0m = H_shared[ty ][tx ];
|
|
||||||
const float H_00 = H_shared[ty+1][tx ];
|
|
||||||
const float H_0p = H_shared[ty+2][tx ];
|
|
||||||
const float H_pm = H_shared[ty ][tx+1];
|
|
||||||
const float H_p0 = H_shared[ty+1][tx+1];
|
|
||||||
const float H_pp = H_shared[ty+2][tx+1];
|
|
||||||
|
|
||||||
const float eta_0m = eta1_shared[ty ][tx ];
|
|
||||||
const float eta_00 = eta1_shared[ty+1][tx ];
|
|
||||||
const float eta_0p = eta1_shared[ty+2][tx ];
|
|
||||||
const float eta_pm = eta1_shared[ty ][tx+1];
|
|
||||||
const float eta_p0 = eta1_shared[ty+1][tx+1];
|
|
||||||
const float eta_pp = eta1_shared[ty+2][tx+1];
|
|
||||||
|
|
||||||
//Reconstruct H_bar and H_x (at the U position)
|
|
||||||
const float H_bar_0m = 0.25f*(H_0m + H_pm + H_00 + H_p0);
|
|
||||||
const float H_bar_00 = 0.25f*(H_00 + H_p0 + H_0p + H_pp);
|
|
||||||
const float H_x = 0.5f*(H_00 + H_p0);
|
|
||||||
|
|
||||||
//Reconstruct Eta_bar at the V position
|
|
||||||
const float eta_bar_0m = 0.25f*(eta_0m + eta_pm + eta_00 + eta_p0);
|
|
||||||
const float eta_bar_00 = 0.25f*(eta_00 + eta_p0 + eta_0p + eta_pp);
|
|
||||||
|
|
||||||
//Reconstruct V at the U position
|
|
||||||
const float V_bar = 0.25f*(V_0m + V_00 + V_pm + V_p0);
|
|
||||||
|
|
||||||
//Calculate the friction coefficient
|
|
||||||
const float C = 1.0 + 2*r_*dt_/H_x + 2*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
|
||||||
|
|
||||||
//Calculate the pressure/gravitational effect
|
|
||||||
const float h_p0 = H_p0 + eta_p0;
|
|
||||||
const float h_00 = H_00 + eta_00;
|
|
||||||
const float h_x = 0.5*(h_00 + h_p0); //Could possibly use h for pressure terms instead of H
|
|
||||||
const float P_x_hat = -0.5f*g_*(eta_p0*eta_p0 - eta_00*eta_00);
|
|
||||||
const float P_x = -g_*h_x*(eta_p0 - eta_00) + P_x_hat;
|
|
||||||
|
|
||||||
//Calculate nonlinear effects
|
|
||||||
const float N_a = (U_p0 + U_00)*(U_p0 + U_00) / (H_p0 + eta_p0);
|
|
||||||
const float N_b = (U_00 + U_m0)*(U_00 + U_m0) / (H_00 + eta_00);
|
|
||||||
const float N_c = (U_0p + U_00)*(V_p0 + V_00) / (H_bar_00 + eta_bar_00);
|
|
||||||
const float N_d = (U_00 + U_0m)*(V_pm + V_0m) / (H_bar_0m + eta_bar_0m);
|
|
||||||
float N = 0.25f*( N_a - N_b + (dx_/dy_)*(N_c - N_d) );
|
|
||||||
|
|
||||||
//Calculate eddy viscosity term
|
|
||||||
float E = (U_p0 - U0 + U_m0)/(dx_*dx_) + (U_0p - U0 + U_0m)/(dy_*dy_);
|
|
||||||
|
|
||||||
//Calculate the wind shear stress
|
|
||||||
float X = windStressX(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
//Compute the V at the next timestep
|
|
||||||
float U2 = (U0 + 2.0f*dt_*(f_*V_bar + (N + P_x)/dx_ + X + A_*E) ) / C;
|
|
||||||
|
|
||||||
//Write to main memory for internal cells
|
|
||||||
if (ti > 0 && ti < nx_ && tj > 0 && tj < ny_+1) {
|
|
||||||
U0_row[ti] = U2;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,222 +0,0 @@
|
|||||||
/**
|
|
||||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
||||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5.
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves V one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeVKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Numerical diffusion
|
|
||||||
float A_,
|
|
||||||
|
|
||||||
//Data
|
|
||||||
__global float* H_ptr_, int H_pitch_,
|
|
||||||
__global float* eta1_ptr_, int eta1_pitch_, // eta^n
|
|
||||||
__global float* U1_ptr_, int U1_pitch_, // U^n
|
|
||||||
__global float* V0_ptr_, int V0_pitch_, // V^n-1, also output V^n+1
|
|
||||||
__global float* V1_ptr_, int V1_pitch_, // V^n
|
|
||||||
|
|
||||||
// Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
__local float H_shared[block_height+1][block_width+2];
|
|
||||||
__local float eta1_shared[block_height+1][block_width+2];
|
|
||||||
__local float U1_shared[block_height+1][block_width+1];
|
|
||||||
__local float V1_shared[block_height+2][block_width+2];
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Start of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
||||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = bx + tx;
|
|
||||||
const int tj = by + ty;
|
|
||||||
|
|
||||||
//Compute pointer to current row in the V array
|
|
||||||
__global float* const V0_row = (__global float*) ((__global char*) V0_ptr_ + V0_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current V
|
|
||||||
float V0 = 0.0f;
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
|
||||||
V0 = V0_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read H and eta into shared memory: (nx+2)*(ny+1) cells
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int l = clamp(by + j, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the H and eta arrays
|
|
||||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
|
||||||
__global float* const eta1_row = (__global float*) ((__global char*) eta1_ptr_ + eta1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
// "fake" global ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i - 1, 1, nx_);
|
|
||||||
|
|
||||||
H_shared[j][i] = H_row[k];
|
|
||||||
eta1_shared[j][i] = eta1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory: (nx+1)*(ny+1) cells
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int l = clamp(by + j, 1, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int k = clamp(bx + i - 1, 0, nx_);
|
|
||||||
|
|
||||||
U1_shared[j][i] = U1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//Read V into shared memory: (nx+2)*(ny+2) cells
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
|
||||||
// Prevent out-of-bounds
|
|
||||||
const int l = clamp(by + j - 1, 0, ny_);
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the U array
|
|
||||||
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
|
||||||
// "fake" ghost cells by clamping
|
|
||||||
const int k = clamp(bx + i - 1, 1, nx_);
|
|
||||||
|
|
||||||
V1_shared[j][i] = V1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Now get all our required variables as short-hands
|
|
||||||
* here we use the notation of
|
|
||||||
* Var_00 as var_i,j
|
|
||||||
* Var_p0 as var_i+1,j
|
|
||||||
* Var_0m as var_i,j-1
|
|
||||||
* etc
|
|
||||||
*/
|
|
||||||
const float V_00 = V1_shared[ty+1][tx+1]; //V at "center"
|
|
||||||
const float V_0p = V1_shared[ty+2][tx+1]; //V at "north"
|
|
||||||
const float V_0m = V1_shared[ty ][tx+1]; //V at "south"
|
|
||||||
const float V_p0 = V1_shared[ty+1][tx+2]; //V at "east"
|
|
||||||
const float V_m0 = V1_shared[ty+1][tx ]; //V at "west"
|
|
||||||
|
|
||||||
const float U_00 = U1_shared[ty ][tx+1];
|
|
||||||
const float U_0p = U1_shared[ty+1][tx+1];
|
|
||||||
const float U_m0 = U1_shared[ty ][tx ];
|
|
||||||
const float U_mp = U1_shared[ty+1][tx ];
|
|
||||||
|
|
||||||
const float H_m0 = H_shared[ty ][tx ];
|
|
||||||
const float H_00 = H_shared[ty ][tx+1];
|
|
||||||
const float H_p0 = H_shared[ty ][tx+2];
|
|
||||||
const float H_mp = H_shared[ty+1][tx ];
|
|
||||||
const float H_0p = H_shared[ty+1][tx+1];
|
|
||||||
const float H_pp = H_shared[ty+1][tx+2];
|
|
||||||
|
|
||||||
const float eta_m0 = eta1_shared[ty ][tx ];
|
|
||||||
const float eta_00 = eta1_shared[ty ][tx+1];
|
|
||||||
const float eta_p0 = eta1_shared[ty ][tx+2];
|
|
||||||
const float eta_mp = eta1_shared[ty+1][tx ];
|
|
||||||
const float eta_0p = eta1_shared[ty+1][tx+1];
|
|
||||||
const float eta_pp = eta1_shared[ty+1][tx+2];
|
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct H_bar and H_y (at the V position)
|
|
||||||
const float H_bar_m0 = 0.25f*(H_m0 + H_mp + H_00 + H_0p);
|
|
||||||
const float H_bar_00 = 0.25f*(H_00 + H_0p + H_p0 + H_pp);
|
|
||||||
const float H_y = 0.5f*(H_00 + H_0p);
|
|
||||||
|
|
||||||
//Reconstruct Eta_bar at the V position
|
|
||||||
const float eta_bar_m0 = 0.25f*(eta_m0 + eta_mp + eta_00 + eta_0p);
|
|
||||||
const float eta_bar_00 = 0.25f*(eta_00 + eta_0p + eta_p0 + eta_pp);
|
|
||||||
|
|
||||||
//Reconstruct U at the V position
|
|
||||||
const float U_bar = 0.25f*(U_m0 + U_00 + U_mp + U_0p);
|
|
||||||
|
|
||||||
//Calculate the friction coefficient
|
|
||||||
const float C = 1.0 + 2*r_*dt_/H_y + 2*A_*dt_*(dx_*dx_ + dy_*dy_)/(dx_*dx_*dy_*dy_);
|
|
||||||
|
|
||||||
//Calculate the pressure/gravitational effect
|
|
||||||
const float h_0p = H_0p + eta_0p;
|
|
||||||
const float h_00 = H_00 + eta_00;
|
|
||||||
const float h_y = 0.5*(h_00 + h_0p); //Could possibly use h for pressure terms instead of H
|
|
||||||
const float P_y_hat = -0.5f*g_*(eta_0p*eta_0p - eta_00*eta_00);
|
|
||||||
const float P_y = -g_*h_y*(eta_0p - eta_00) + P_y_hat;
|
|
||||||
|
|
||||||
//Calculate nonlinear effects
|
|
||||||
const float N_a = (V_0p + V_00)*(V_0p + V_00) / (H_0p + eta_0p);
|
|
||||||
const float N_b = (V_00 + V_0m)*(V_00 + V_0m) / (H_00 + eta_00);
|
|
||||||
const float N_c = (U_0p + U_00)*(V_p0 + V_00) / (H_bar_00 + eta_bar_00);
|
|
||||||
const float N_d = (U_mp + U_m0)*(V_00 + V_m0) / (H_bar_m0 + eta_bar_m0);
|
|
||||||
float N = 0.25f*( N_a - N_b + (dy_/dx_)*(N_c - N_d) );
|
|
||||||
|
|
||||||
//Calculate eddy viscosity term
|
|
||||||
float E = (V_p0 - V0 + V_m0)/(dx_*dx_) + (V_0p - V0 + V_0m)/(dy_*dy_);
|
|
||||||
|
|
||||||
//Calculate the wind shear stress
|
|
||||||
float Y = windStressY(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
//Compute the V at the next timestep
|
|
||||||
float V2 = (V0 + 2.0f*dt_*(-f_*U_bar + (N + P_y)/dy_ + Y + A_*E) ) / C;
|
|
||||||
|
|
||||||
//Write to main memory for internal cells
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_) {
|
|
||||||
V0_row[ti] = V2;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,109 +0,0 @@
|
|||||||
/**
|
|
||||||
This OpenCL kernel implements part of the Centered in Time, Centered
|
|
||||||
in Space (leapfrog) numerical scheme for the shallow water equations,
|
|
||||||
described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves eta one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeEtaKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Data
|
|
||||||
__global float* eta0_ptr_, int eta0_pitch_, //eta^n-1 (also used as output, that is eta^n+1)
|
|
||||||
__global float* U1_ptr_, int U1_pitch_, // U^n
|
|
||||||
__global float* V1_ptr_, int V1_pitch_ // V^n
|
|
||||||
) {
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Start of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0) + 1; //Skip global ghost cells
|
|
||||||
const int by = get_local_size(1) * get_group_id(1) + 1; //Skip global ghost cells
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = bx + tx;
|
|
||||||
const int tj = by + ty;
|
|
||||||
|
|
||||||
__local float U1_shared[block_height][block_width+1];
|
|
||||||
__local float V1_shared[block_height+1][block_width];
|
|
||||||
|
|
||||||
//Compute pointer to current row in the U array
|
|
||||||
__global float* eta0_row = (__global float*) ((__global char*) eta0_ptr_ + eta0_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current eta
|
|
||||||
float eta0 = 0.0f;
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
|
||||||
eta0 = eta0_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
|
||||||
const int l = clamp(by + j, 1, ny_); // fake ghost cells
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const U1_row = (__global float*) ((__global char*) U1_ptr_ + U1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const int k = clamp(bx + i - 1, 0, nx_); // prevent out of bounds
|
|
||||||
|
|
||||||
U1_shared[j][i] = U1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read V into shared memory
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const int l = clamp(by + j - 1, 0, ny_); // prevent out of bounds
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const V1_row = (__global float*) ((__global char*) V1_ptr_ + V1_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
|
||||||
const int k = clamp(bx + i, 1, nx_); // fake ghost cells
|
|
||||||
|
|
||||||
V1_shared[j][i] = V1_row[k];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
//Compute the H at the next timestep
|
|
||||||
float eta2 = eta0 - 2.0f*dt_/dx_ * (U1_shared[ty][tx+1] - U1_shared[ty][tx])
|
|
||||||
- 2.0f*dt_/dy_ * (V1_shared[ty+1][tx] - V1_shared[ty][tx]);
|
|
||||||
|
|
||||||
//Write to main memory
|
|
||||||
if (ti > 0 && ti < nx_+1 && tj > 0 && tj < ny_+1) {
|
|
||||||
eta0_row[ti] = eta2;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,79 +1,159 @@
|
|||||||
import pyopencl
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Static function which reads a text file and creates an OpenCL kernel from that
|
Class which keeps track of the CUDA context and some helper functions
|
||||||
"""
|
"""
|
||||||
def get_kernel(cl_ctx, kernel_filename, block_width, block_height):
|
class CudaContext(object):
|
||||||
import datetime
|
def __init__(self, verbose=True, blocking=False):
|
||||||
|
self.verbose = verbose
|
||||||
|
self.blocking = blocking
|
||||||
|
self.kernels = {}
|
||||||
|
|
||||||
|
cuda.init(flags=0)
|
||||||
|
|
||||||
|
if (self.verbose):
|
||||||
|
print("CUDA version " + str(cuda.get_version()))
|
||||||
|
print("Driver version " + str(cuda.get_driver_version()))
|
||||||
|
|
||||||
|
self.cuda_device = cuda.Device(0)
|
||||||
|
if (self.verbose):
|
||||||
|
print("Using " + self.cuda_device.name())
|
||||||
|
print(" => compute capability: " + str(self.cuda_device.compute_capability()))
|
||||||
|
print(" => memory: " + str(self.cuda_device.total_memory() / (1024*1024)) + " MB")
|
||||||
|
|
||||||
|
if (self.blocking):
|
||||||
|
self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC)
|
||||||
|
if (self.verbose):
|
||||||
|
print("=== WARNING ===")
|
||||||
|
print("Using blocking context")
|
||||||
|
print("=== WARNING ===")
|
||||||
|
else:
|
||||||
|
self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
|
||||||
|
|
||||||
|
if (self.verbose):
|
||||||
|
print("Created context <" + str(self.cuda_context.handle) + ">")
|
||||||
|
|
||||||
|
|
||||||
|
def __del__(self, *args):
|
||||||
|
if self.verbose:
|
||||||
|
print("Cleaning up CUDA context <" + str(self.cuda_context.handle) + ">")
|
||||||
|
|
||||||
|
# Loop over all contexts in stack, and remove "this"
|
||||||
|
other_contexts = []
|
||||||
|
while (cuda.Context.get_current() != None):
|
||||||
|
context = cuda.Context.get_current()
|
||||||
|
if (self.verbose):
|
||||||
|
if (context.handle != self.cuda_context.handle):
|
||||||
|
print(" `-> <" + str(self.cuda_context.handle) + "> Popping context <" + str(context.handle) + "> which we do not own")
|
||||||
|
other_contexts = [context] + other_contexts
|
||||||
|
cuda.Context.pop()
|
||||||
|
else:
|
||||||
|
print(" `-> <" + str(self.cuda_context.handle) + "> Popping context <" + str(context.handle) + "> (ourselves)")
|
||||||
|
cuda.Context.pop()
|
||||||
|
|
||||||
|
# Add all the contexts we popped that were not our own
|
||||||
|
for context in other_contexts:
|
||||||
|
if (self.verbose):
|
||||||
|
print(" `-> <" + str(self.cuda_context.handle) + "> Pushing <" + str(context.handle) + ">")
|
||||||
|
cuda.Context.push(context)
|
||||||
|
|
||||||
|
if (self.verbose):
|
||||||
|
print(" `-> <" + str(self.cuda_context.handle) + "> Detaching context")
|
||||||
|
self.cuda_context.detach()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Reads a text file and creates an OpenCL kernel from that
|
||||||
|
"""
|
||||||
|
def get_kernel(self, kernel_filename, block_width, block_height):
|
||||||
|
# Generate a kernel ID for our cache
|
||||||
|
module_path = os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
kernel_hash = ""
|
||||||
|
|
||||||
|
# Loop over file and includes, and check if something has changed
|
||||||
|
files = [kernel_filename]
|
||||||
|
while len(files):
|
||||||
|
filename = os.path.join(module_path, files.pop())
|
||||||
|
modified = os.path.getmtime(filename)
|
||||||
|
with open(filename, "r") as file:
|
||||||
|
file_str = file.read()
|
||||||
|
file_hash = filename + "_" + str(hash(file_str)) + ":" + str(modified) + "--"
|
||||||
|
includes = re.findall('^\W*#include\W+(.+?)\W*$', file_str, re.M)
|
||||||
|
files = files + includes #WARNING FIXME This will not work with circular includes
|
||||||
|
|
||||||
|
kernel_hash = kernel_hash + file_hash
|
||||||
|
|
||||||
|
# Recompile kernel if file or includes have changed
|
||||||
|
if (kernel_hash not in self.kernels.keys()):
|
||||||
#Create define string
|
#Create define string
|
||||||
define_string = "#define block_width " + str(block_width) + "\n"
|
define_string = "#define block_width " + str(block_width) + "\n"
|
||||||
define_string += "#define block_height " + str(block_height) + "\n\n"
|
define_string += "#define block_height " + str(block_height) + "\n\n"
|
||||||
define_string += "#ifndef my_variable_to_force_recompilation\n"
|
|
||||||
define_string += "#define my_variable_to_force_recompilation " + datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + "\n"
|
|
||||||
define_string += "#undef my_variable_to_force_recompilation \n"
|
|
||||||
define_string += "#endif\n\n"
|
|
||||||
|
|
||||||
|
kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"'
|
||||||
|
self.kernels[kernel_hash] = cuda_compiler.SourceModule(kernel_string, include_dirs=[module_path])
|
||||||
|
|
||||||
def shellquote(s):
|
return self.kernels[kernel_hash]
|
||||||
assert(cl_ctx.num_devices == 1)
|
|
||||||
platform_name = cl_ctx.devices[0].get_info(pyopencl.device_info.PLATFORM).name
|
|
||||||
platform_name = platform_name.upper()
|
|
||||||
if ('INTEL' in platform_name):
|
|
||||||
#Intel CL compiler doesn't like spaces in include paths. We have to escape them
|
|
||||||
return '"' + s.replace(" ", "\\ ") + '"'
|
|
||||||
elif ('NVIDIA' in platform_name):
|
|
||||||
#NVIDIA doesn't like double quoted paths...
|
|
||||||
#return "'" + s + "'"
|
|
||||||
return s
|
|
||||||
|
|
||||||
module_path = os.path.dirname(os.path.realpath(__file__))
|
|
||||||
module_path_escaped = shellquote(module_path)
|
|
||||||
options = ['-I', module_path_escaped]
|
|
||||||
|
|
||||||
#Read the proper program
|
|
||||||
fullpath = os.path.join(module_path, kernel_filename)
|
|
||||||
with open(fullpath, "r") as kernel_file:
|
|
||||||
kernel_string = define_string + kernel_file.read()
|
|
||||||
kernel = pyopencl.Program(cl_ctx, kernel_string).build(options)
|
|
||||||
|
|
||||||
return kernel
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Clears the kernel cache (useful for debugging & development)
|
||||||
|
"""
|
||||||
|
def clear_kernel_cache(self):
|
||||||
|
self.kernels = {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class Timer(object):
|
||||||
|
def __init__(self, tag, verbose=True):
|
||||||
|
self.verbose = verbose
|
||||||
|
self.tag = tag
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.start = time.time()
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self.end = time.time()
|
||||||
|
self.secs = self.end - self.start
|
||||||
|
self.msecs = self.secs * 1000 # millisecs
|
||||||
|
if self.verbose:
|
||||||
|
print("=> " + self.tag + ' %f ms' % self.msecs)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Class that holds data
|
Class that holds data
|
||||||
"""
|
"""
|
||||||
class OpenCLArray2D:
|
class CUDAArray2D:
|
||||||
"""
|
"""
|
||||||
Uploads initial data to the CL device
|
Uploads initial data to the CL device
|
||||||
"""
|
"""
|
||||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, data):
|
def __init__(self, stream, nx, ny, halo_x, halo_y, data):
|
||||||
host_data = self.convert_to_float32(data)
|
|
||||||
|
|
||||||
self.nx = nx
|
self.nx = nx
|
||||||
self.ny = ny
|
self.ny = ny
|
||||||
self.nx_halo = nx + 2*halo_x
|
self.nx_halo = nx + 2*halo_x
|
||||||
self.ny_halo = ny + 2*halo_y
|
self.ny_halo = ny + 2*halo_y
|
||||||
assert(host_data.shape[1] == self.nx_halo)
|
|
||||||
assert(host_data.shape[0] == self.ny_halo)
|
|
||||||
|
|
||||||
assert(data.shape == (self.ny_halo, self.nx_halo))
|
#Make sure data is in proper format
|
||||||
|
assert np.issubdtype(data.dtype, np.float32), "Wrong datatype: %s" % str(data.dtype)
|
||||||
|
assert not np.isfortran(data), "Wrong datatype (Fortran, expected C)"
|
||||||
|
assert data.shape == (self.ny_halo, self.nx_halo), "Wrong data shape: %s" % str(data.shape)
|
||||||
|
|
||||||
#Upload data to the device
|
#Upload data to the device
|
||||||
mf = pyopencl.mem_flags
|
self.data = pycuda.gpuarray.to_gpu_async(data, stream=stream)
|
||||||
self.data = pyopencl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=host_data)
|
|
||||||
|
|
||||||
self.bytes_per_float = host_data.itemsize
|
self.bytes_per_float = data.itemsize
|
||||||
assert(self.bytes_per_float == 4)
|
assert(self.bytes_per_float == 4)
|
||||||
self.pitch = np.int32((self.nx_halo)*self.bytes_per_float)
|
self.pitch = np.int32((self.nx_halo)*self.bytes_per_float)
|
||||||
|
|
||||||
@ -81,27 +161,15 @@ class OpenCLArray2D:
|
|||||||
"""
|
"""
|
||||||
Enables downloading data from CL device to Python
|
Enables downloading data from CL device to Python
|
||||||
"""
|
"""
|
||||||
def download(self, cl_queue):
|
def download(self, stream, async=False):
|
||||||
#Allocate data on the host for result
|
|
||||||
host_data = np.empty((self.ny_halo, self.nx_halo), dtype=np.float32, order='C')
|
|
||||||
|
|
||||||
#Copy data from device to host
|
#Copy data from device to host
|
||||||
pyopencl.enqueue_copy(cl_queue, host_data, self.data)
|
if (async):
|
||||||
|
host_data = self.data.get_async(stream=stream)
|
||||||
#Return
|
return host_data
|
||||||
|
else:
|
||||||
|
host_data = self.data.get(stream=stream)#, pagelocked=True) # pagelocked causes crash on windows at least
|
||||||
return host_data
|
return host_data
|
||||||
|
|
||||||
"""
|
|
||||||
Converts to C-style float 32 array suitable for the GPU/OpenCL
|
|
||||||
"""
|
|
||||||
@staticmethod
|
|
||||||
def convert_to_float32(data):
|
|
||||||
if (not np.issubdtype(data.dtype, np.float32) or np.isfortran(data)):
|
|
||||||
#print("Converting H0")
|
|
||||||
return data.astype(np.float32, order='C')
|
|
||||||
else:
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -111,20 +179,20 @@ class OpenCLArray2D:
|
|||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
|
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
||||||
"""
|
"""
|
||||||
class SWEDataArkawaA:
|
class SWEDataArakawaA:
|
||||||
"""
|
"""
|
||||||
Uploads initial data to the CL device
|
Uploads initial data to the CL device
|
||||||
"""
|
"""
|
||||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||||
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
self.h0 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||||
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
self.hu0 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||||
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
self.hv0 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||||
|
|
||||||
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
self.h1 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||||
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
self.hu1 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||||
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
self.hv1 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Swaps the variables after a timestep has been completed
|
Swaps the variables after a timestep has been completed
|
||||||
@ -137,153 +205,11 @@ class SWEDataArkawaA:
|
|||||||
"""
|
"""
|
||||||
Enables downloading data from CL device to Python
|
Enables downloading data from CL device to Python
|
||||||
"""
|
"""
|
||||||
def download(self, cl_queue):
|
def download(self, stream):
|
||||||
h_cpu = self.h0.download(cl_queue)
|
h_cpu = self.h0.download(stream, async=True)
|
||||||
hu_cpu = self.hu0.download(cl_queue)
|
hu_cpu = self.hu0.download(stream, async=True)
|
||||||
hv_cpu = self.hv0.download(cl_queue)
|
hv_cpu = self.hv0.download(stream, async=False)
|
||||||
|
|
||||||
return h_cpu, hu_cpu, hv_cpu
|
return h_cpu, hu_cpu, hv_cpu
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
|
|
||||||
"""
|
|
||||||
class SWEDataArkawaA:
|
|
||||||
"""
|
|
||||||
Uploads initial data to the CL device
|
|
||||||
"""
|
|
||||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
|
||||||
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
||||||
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
|
||||||
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
|
||||||
|
|
||||||
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
||||||
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
|
||||||
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Swaps the variables after a timestep has been completed
|
|
||||||
"""
|
|
||||||
def swap(self):
|
|
||||||
self.h1, self.h0 = self.h0, self.h1
|
|
||||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
|
||||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
|
||||||
|
|
||||||
"""
|
|
||||||
Enables downloading data from CL device to Python
|
|
||||||
"""
|
|
||||||
def download(self, cl_queue):
|
|
||||||
h_cpu = self.h0.download(cl_queue)
|
|
||||||
hu_cpu = self.hu0.download(cl_queue)
|
|
||||||
hv_cpu = self.hv0.download(cl_queue)
|
|
||||||
|
|
||||||
return h_cpu, hu_cpu, hv_cpu
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
A class representing an Akrawa C type (staggered, u fluxes on east/west faces, v fluxes on north/south faces) grid
|
|
||||||
We use h as cell centers
|
|
||||||
"""
|
|
||||||
class SWEDataArkawaC:
|
|
||||||
"""
|
|
||||||
Uploads initial data to the CL device
|
|
||||||
"""
|
|
||||||
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
|
||||||
#FIXME: This at least works for 0 and 1 ghost cells, but not convinced it generalizes
|
|
||||||
assert(halo_x <= 1 and halo_y <= 1)
|
|
||||||
|
|
||||||
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
||||||
self.hu0 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
|
|
||||||
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
|
|
||||||
|
|
||||||
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
||||||
self.hu1 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
|
|
||||||
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Swaps the variables after a timestep has been completed
|
|
||||||
"""
|
|
||||||
def swap(self):
|
|
||||||
#h is assumed to be constant (bottom topography really)
|
|
||||||
self.h1, self.h0 = self.h0, self.h1
|
|
||||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
|
||||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
|
||||||
|
|
||||||
"""
|
|
||||||
Enables downloading data from CL device to Python
|
|
||||||
"""
|
|
||||||
def download(self, cl_queue):
|
|
||||||
h_cpu = self.h0.download(cl_queue)
|
|
||||||
hu_cpu = self.hu0.download(cl_queue)
|
|
||||||
hv_cpu = self.hv0.download(cl_queue)
|
|
||||||
|
|
||||||
return h_cpu, hu_cpu, hv_cpu
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class which represents different wind stresses
|
|
||||||
"""
|
|
||||||
class WindStressParams:
|
|
||||||
|
|
||||||
"""
|
|
||||||
wind_type: TYpe of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
|
||||||
wind_tau0: Amplitude of wind stress (Pa)
|
|
||||||
wind_rho: Density of sea water (1025.0 kg / m^3)
|
|
||||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
|
||||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
|
||||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
|
||||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
|
||||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
|
||||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
|
||||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
|
||||||
"""
|
|
||||||
def __init__(self,
|
|
||||||
type=99, # "no wind" \
|
|
||||||
tau0=0, rho=0, alpha=0, xm=0, Rc=0, \
|
|
||||||
x0=0, y0=0, \
|
|
||||||
u0=0, v0=0):
|
|
||||||
self.type = np.int32(type)
|
|
||||||
self.tau0 = np.float32(tau0)
|
|
||||||
self.rho = np.float32(rho)
|
|
||||||
self.alpha = np.float32(alpha)
|
|
||||||
self.xm = np.float32(xm)
|
|
||||||
self.Rc = np.float32(Rc)
|
|
||||||
self.x0 = np.float32(x0)
|
|
||||||
self.y0 = np.float32(y0)
|
|
||||||
self.u0 = np.float32(u0)
|
|
||||||
self.v0 = np.float32(v0)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,123 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
This python module implements saving shallow water simulations to a
|
|
||||||
netcdf file.
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from netCDF4 import Dataset
|
|
||||||
|
|
||||||
class CTCSNetCDFWriter:
|
|
||||||
def __init__(self, outfilename, nx, ny, dx, dy, ignore_ghostcells=True):
|
|
||||||
self.ncfile = Dataset(outfilename,'w')
|
|
||||||
self.ignore_ghostcells = ignore_ghostcells
|
|
||||||
|
|
||||||
#Create dimensions
|
|
||||||
self.ncfile.createDimension('time', None) #Unlimited time dimension
|
|
||||||
if (self.ignore_ghostcells):
|
|
||||||
self.ncfile.createDimension('x_eta', nx)
|
|
||||||
self.ncfile.createDimension('y_eta', ny)
|
|
||||||
self.ncfile.createDimension('x_u', nx-1)
|
|
||||||
self.ncfile.createDimension('y_u', ny)
|
|
||||||
self.ncfile.createDimension('x_v', nx)
|
|
||||||
self.ncfile.createDimension('y_v', ny-1)
|
|
||||||
else:
|
|
||||||
self.ncfile.createDimension('x_eta', nx+2)
|
|
||||||
self.ncfile.createDimension('y_eta', ny+2)
|
|
||||||
self.ncfile.createDimension('x_u', nx+1)
|
|
||||||
self.ncfile.createDimension('y_u', ny+2)
|
|
||||||
self.ncfile.createDimension('x_v', nx+2)
|
|
||||||
self.ncfile.createDimension('y_v', ny+1)
|
|
||||||
|
|
||||||
#Create axis
|
|
||||||
self.nc_time = self.ncfile.createVariable('time', np.dtype('float32').char, 'time')
|
|
||||||
x_eta = self.ncfile.createVariable('x_eta', np.dtype('float32').char, 'x_eta')
|
|
||||||
y_eta = self.ncfile.createVariable('y_eta', np.dtype('float32').char, 'y_eta')
|
|
||||||
x_u = self.ncfile.createVariable('x_u', np.dtype('float32').char, 'x_u')
|
|
||||||
y_u = self.ncfile.createVariable('y_u', np.dtype('float32').char, 'y_u')
|
|
||||||
x_v = self.ncfile.createVariable('x_v', np.dtype('float32').char, 'x_v')
|
|
||||||
y_v = self.ncfile.createVariable('y_v', np.dtype('float32').char, 'y_v')
|
|
||||||
|
|
||||||
#Set axis values/ticks
|
|
||||||
if (self.ignore_ghostcells):
|
|
||||||
x_eta[:] = np.linspace(dx/2.0, nx*dx - dx/2.0, nx)
|
|
||||||
y_eta[:] = np.linspace(dy/2.0, ny*dy - dy/2.0, ny)
|
|
||||||
x_u[:] = np.linspace(1, (nx-1)*dx, nx-1)
|
|
||||||
y_u[:] = np.linspace(dy/2.0, ny*dy - dy/2.0, ny)
|
|
||||||
x_v[:] = np.linspace(dx/2.0, nx*dx - dx/2.0, nx)
|
|
||||||
y_v[:] = np.linspace(1, (ny-1)*dy, ny-1)
|
|
||||||
else:
|
|
||||||
x_eta[:] = np.linspace(-dx/2.0, nx*dx + dx/2.0, nx+2)
|
|
||||||
y_eta[:] = np.linspace(-dy/2.0, ny*dy + dy/2.0, ny+2)
|
|
||||||
x_u[:] = np.linspace(0, nx*dx, nx+1)
|
|
||||||
y_u[:] = np.linspace(-dy/2.0, ny*dy + dy/2.0, ny+2)
|
|
||||||
x_v[:] = np.linspace(-dx/2.0, nx*dx + dx/2.0, nx+2)
|
|
||||||
y_v[:] = np.linspace(0, ny*dy, ny+1)
|
|
||||||
|
|
||||||
#Set units
|
|
||||||
self.nc_time.units = 's'
|
|
||||||
x_eta.units = 'm'
|
|
||||||
y_eta.units = 'm'
|
|
||||||
x_u.units = 'm'
|
|
||||||
y_u.units = 'm'
|
|
||||||
x_v.units = 'm'
|
|
||||||
y_v.units = 'm'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Create output data variables
|
|
||||||
self.nc_eta = self.ncfile.createVariable('eta', np.dtype('float32').char, ('time', 'y_eta', 'x_eta'))
|
|
||||||
self.nc_u = self.ncfile.createVariable('u', np.dtype('float32').char, ('time', 'y_u', 'x_u'))
|
|
||||||
self.nc_v = self.ncfile.createVariable('v', np.dtype('float32').char, ('time', 'y_v', 'x_v'))
|
|
||||||
|
|
||||||
#Set units
|
|
||||||
self.nc_eta.units = 'm'
|
|
||||||
self.nc_u.units = 'm'
|
|
||||||
self.nc_v.units = 'm'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __enter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __exit__(self, exc_type, exc_value, traceback):
|
|
||||||
#print("Closing '" + self.ncfile.filepath() + "'")
|
|
||||||
self.ncfile.close()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def write(self, i, t, eta, u, v):
|
|
||||||
if (self.ignore_ghostcells):
|
|
||||||
self.nc_time[i] = t
|
|
||||||
self.nc_eta[i, :] = eta[1:-1, 1:-1]
|
|
||||||
self.nc_u[i, :] = u[1:-1, 1:-1]
|
|
||||||
self.nc_v[i, :] = v[1:-1, 1:-1]
|
|
||||||
else:
|
|
||||||
self.nc_time[i] = t
|
|
||||||
self.nc_eta[i, :] = eta
|
|
||||||
self.nc_u[i, :] = u
|
|
||||||
self.nc_v[i, :] = v
|
|
@ -1,186 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
This python module implements the Forward Backward Linear numerical
|
|
||||||
scheme for the shallow water equations, described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
"""
|
|
||||||
|
|
||||||
#Import packages we need
|
|
||||||
import numpy as np
|
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
|
||||||
"""
|
|
||||||
class FBL:
|
|
||||||
|
|
||||||
"""
|
|
||||||
Initialization routine
|
|
||||||
H: Water depth incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
eta0: Initial deviation from mean sea level incl ghost cells, (nx+2)*(ny+2) cells
|
|
||||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+2) cells
|
|
||||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+2)*(ny+1) cells
|
|
||||||
nx: Number of cells along x-axis
|
|
||||||
ny: Number of cells along y-axis
|
|
||||||
dx: Grid cell spacing along x-axis (20 000 m)
|
|
||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
|
||||||
dt: Size of each timestep (90 s)
|
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
|
||||||
f: Coriolis parameter (1.2e-4 s^1)
|
|
||||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
|
||||||
wind_stress: Wind stress parameters
|
|
||||||
"""
|
|
||||||
def __init__(self, \
|
|
||||||
cl_ctx, \
|
|
||||||
H, eta0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
|
||||||
dx, dy, dt, \
|
|
||||||
g, f, r, \
|
|
||||||
wind_stress=Common.WindStressParams(), \
|
|
||||||
block_width=16, block_height=16):
|
|
||||||
self.cl_ctx = cl_ctx
|
|
||||||
|
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
|
||||||
self.u_kernel = Common.get_kernel(self.cl_ctx, "FBL_U_kernel.opencl", block_width, block_height)
|
|
||||||
self.v_kernel = Common.get_kernel(self.cl_ctx, "FBL_V_kernel.opencl", block_width, block_height)
|
|
||||||
self.eta_kernel = Common.get_kernel(self.cl_ctx, "FBL_eta_kernel.opencl", block_width, block_height)
|
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 0
|
|
||||||
ghost_cells_y = 0
|
|
||||||
self.H = Common.OpenCLArray2D(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, H)
|
|
||||||
self.cl_data = Common.SWEDataArkawaC(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, eta0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.f = np.float32(f)
|
|
||||||
self.r = np.float32(r)
|
|
||||||
self.wind_stress = wind_stress
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (8, 8) # WARNING::: MUST MATCH defines of block_width/height in kernels!
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return "Forward Backward Linear"
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Function which steps n timesteps
|
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
self.u_kernel.computeUKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, self.r, \
|
|
||||||
self.H.data, self.H.pitch, \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
|
|
||||||
self.v_kernel.computeVKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, self.r, \
|
|
||||||
self.H.data, self.H.pitch, \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
|
|
||||||
self.eta_kernel.computeEtaKernel(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, self.f, self.r, \
|
|
||||||
self.H.data, self.H.pitch, \
|
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch)
|
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
return self.t
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.cl_data.download(self.cl_queue)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,163 +0,0 @@
|
|||||||
/*
|
|
||||||
This OpenCL kernel implements part of the Forward Backward Linear
|
|
||||||
numerical scheme for the shallow water equations, described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include "common.opencl"
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves U one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeUKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Data
|
|
||||||
__global float* H_ptr_, int H_pitch_,
|
|
||||||
__global float* U_ptr_, int U_pitch_,
|
|
||||||
__global float* V_ptr_, int V_pitch_,
|
|
||||||
__global float* eta_ptr_, int eta_pitch_,
|
|
||||||
|
|
||||||
// Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
__local float H_shared[block_height][block_width+1];
|
|
||||||
__local float V_shared[block_height+1][block_width+1];
|
|
||||||
__local float eta_shared[block_height][block_width+1];
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Index of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = get_global_id(0);
|
|
||||||
const int tj = get_global_id(1);
|
|
||||||
|
|
||||||
//Compute pointer to row "tj" in the U array
|
|
||||||
__global float* const U_row = (__global float*) ((__global char*) U_ptr_ + U_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current U
|
|
||||||
float U_current = 0.0f;
|
|
||||||
if (ti < nx_ + 1 && tj < ny_) {
|
|
||||||
U_current = U_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read H and eta into local memory
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
|
||||||
const int l = by + j;
|
|
||||||
|
|
||||||
//Compute the pointer to row "l" in the H and eta arrays
|
|
||||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
|
||||||
__global float* const eta_row = (__global float*) ((__global char*) eta_ptr_ + eta_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const int k = bx + i - 1;
|
|
||||||
|
|
||||||
if (k >= 0 && k < nx_ && l < ny_+1) {
|
|
||||||
H_shared[j][i] = H_row[k];
|
|
||||||
eta_shared[j][i] = eta_row[k];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
H_shared[j][i] = 0.0f;
|
|
||||||
eta_shared[j][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read V into shared memory
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const int l = by + j;
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const V_row = (__global float*) ((__global char*) V_ptr_ + V_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const int k = bx + i - 1;
|
|
||||||
|
|
||||||
if (k >= 0 && k < nx_ && l < ny_+1) {
|
|
||||||
V_shared[j][i] = V_row[k];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
V_shared[j][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
//Reconstruct H at the U position
|
|
||||||
float H_m = 0.5f*(H_shared[ty][tx] + H_shared[ty][tx+1]);
|
|
||||||
|
|
||||||
//Reconstruct V at the U position
|
|
||||||
float V_m = 0.0f;
|
|
||||||
if (tj==0) {
|
|
||||||
V_m = 0.5f*(V_shared[ty+1][tx] + V_shared[ty+1][tx+1]);
|
|
||||||
}
|
|
||||||
else if (tj==ny_-1) {
|
|
||||||
V_m = 0.5f*(V_shared[ty][tx] + V_shared[ty][tx+1]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
V_m = 0.25f*(V_shared[ty][tx] + V_shared[ty][tx+1]
|
|
||||||
+ V_shared[ty+1][tx] + V_shared[ty+1][tx+1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Calculate the friction coefficient
|
|
||||||
float B = H_m/(H_m + r_*dt_);
|
|
||||||
|
|
||||||
//Calculate the gravitational effect
|
|
||||||
float P = g_*H_m*(eta_shared[ty][tx] - eta_shared[ty][tx+1])/dx_;
|
|
||||||
|
|
||||||
//Calculate the wind shear stress
|
|
||||||
float X = windStressX(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
//Compute the U at the next timestep
|
|
||||||
float U_next = B*(U_current + dt_*(f_*V_m + P + X) );
|
|
||||||
|
|
||||||
//Write to main memory for internal cells
|
|
||||||
if (ti < nx_+1 && tj < ny_) {
|
|
||||||
//Closed boundaries
|
|
||||||
if (ti == 0 || ti == nx_) {
|
|
||||||
U_next = 0.0f;
|
|
||||||
}
|
|
||||||
U_row[ti] = U_next;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,168 +0,0 @@
|
|||||||
/*
|
|
||||||
This OpenCL kernel implements part of the Forward Backward Linear
|
|
||||||
numerical scheme for the shallow water equations, described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves V one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeVKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Data
|
|
||||||
__global float* H_ptr_, int H_pitch_,
|
|
||||||
__global float* U_ptr_, int U_pitch_,
|
|
||||||
__global float* V_ptr_, int V_pitch_,
|
|
||||||
__global float* eta_ptr_, int eta_pitch_,
|
|
||||||
|
|
||||||
// Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
__local float H_shared[block_height+1][block_width];
|
|
||||||
__local float U_shared[block_height+1][block_width+1];
|
|
||||||
__local float eta_shared[block_height+1][block_width];
|
|
||||||
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Index of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = get_global_id(0);
|
|
||||||
const int tj = get_global_id(1);
|
|
||||||
|
|
||||||
//Compute pointer to current row in the U array
|
|
||||||
__global float* const V_row = (__global float*) ((__global char*) V_ptr_ + V_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current V
|
|
||||||
float V_current = 0.0f;
|
|
||||||
if (ti < nx_ && tj < ny_+1) {
|
|
||||||
V_current = V_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read H and eta into shared memory
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const int l = by + j - 1;
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the H and eta arrays
|
|
||||||
__global float* const H_row = (__global float*) ((__global char*) H_ptr_ + H_pitch_*l);
|
|
||||||
__global float* const eta_row = (__global float*) ((__global char*) eta_ptr_ + eta_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
|
||||||
const int k = bx + i;
|
|
||||||
if (k < nx_ && l >= 0 && l < ny_+1) {
|
|
||||||
H_shared[j][i] = H_row[k];
|
|
||||||
eta_shared[j][i] = eta_row[k];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
H_shared[j][i] = 0.0f;
|
|
||||||
eta_shared[j][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const int l = by + j - 1;
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const U_row = (__global float*) ((__global char*) U_ptr_ + U_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const int k = bx + i;
|
|
||||||
if (k < nx_+1 && l >= 0 && l < ny_) {
|
|
||||||
U_shared[j][i] = U_row[k];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
U_shared[j][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
//Reconstruct H at the V position
|
|
||||||
float H_m = 0.5f*(H_shared[ty][tx] + H_shared[ty+1][tx]);
|
|
||||||
|
|
||||||
//Reconstruct U at the V position
|
|
||||||
float U_m;
|
|
||||||
if (ti==0) {
|
|
||||||
U_m = 0.5f*(U_shared[ty][tx+1] + U_shared[ty+1][tx+1]);
|
|
||||||
}
|
|
||||||
else if (ti==nx_-1) {
|
|
||||||
U_m = 0.5f*(U_shared[ty][tx] + U_shared[ty+1][tx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
U_m = 0.25f*(U_shared[ty][tx] + U_shared[ty][tx+1]
|
|
||||||
+ U_shared[ty+1][tx] + U_shared[ty+1][tx+1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Calculate the friction coefficient
|
|
||||||
float B = H_m/(H_m + r_*dt_);
|
|
||||||
|
|
||||||
//Calculate the gravitational effect
|
|
||||||
float P = g_*H_m*(eta_shared[ty][tx] - eta_shared[ty+1][tx])/dy_;
|
|
||||||
|
|
||||||
//Calculate the wind shear stress
|
|
||||||
float Y = windStressY(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
//Compute the V at the next timestep
|
|
||||||
float V_next = B*(V_current + dt_*(-f_*U_m + P + Y) );
|
|
||||||
|
|
||||||
//Write to main memory
|
|
||||||
if (ti < nx_ && tj < ny_+1) {
|
|
||||||
//Closed boundaries
|
|
||||||
if (tj == 0) {
|
|
||||||
V_next = 0.0f;
|
|
||||||
}
|
|
||||||
else if (tj == ny_) {
|
|
||||||
V_next = 0.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
V_row[ti] = V_next;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,113 +0,0 @@
|
|||||||
/*
|
|
||||||
This OpenCL kernel implements part of the Forward Backward Linear
|
|
||||||
numerical scheme for the shallow water equations, described in
|
|
||||||
L. P. Røed, "Documentation of simple ocean models for use in ensemble
|
|
||||||
predictions", Met no report 2012/3 and 2012/5 .
|
|
||||||
|
|
||||||
Copyright (C) 2016 SINTEF ICT
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Kernel that evolves eta one step in time.
|
|
||||||
*/
|
|
||||||
__kernel void computeEtaKernel(
|
|
||||||
//Discretization parameters
|
|
||||||
int nx_, int ny_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
|
|
||||||
//Physical parameters
|
|
||||||
float g_, //< Gravitational constant
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
|
||||||
|
|
||||||
//Data
|
|
||||||
__global float* H_ptr_, int H_pitch_,
|
|
||||||
__global float* U_ptr_, int U_pitch_,
|
|
||||||
__global float* V_ptr_, int V_pitch_,
|
|
||||||
__global float* eta_ptr_, int eta_pitch_) {
|
|
||||||
//Index of thread within block
|
|
||||||
const int tx = get_local_id(0);
|
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Index of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = get_global_id(0);
|
|
||||||
const int tj = get_global_id(1);
|
|
||||||
|
|
||||||
__local float U_shared[block_height][block_width+1];
|
|
||||||
__local float V_shared[block_height+1][block_width];
|
|
||||||
|
|
||||||
//Compute pointer to current row in the U array
|
|
||||||
__global float* const eta_row = (__global float*) ((__global char*) eta_ptr_ + eta_pitch_*tj);
|
|
||||||
|
|
||||||
//Read current eta
|
|
||||||
float eta_current = 0.0f;
|
|
||||||
if (ti < nx_ && tj < ny_) {
|
|
||||||
eta_current = eta_row[ti];
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read U into shared memory
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
|
||||||
const unsigned int l = by + j;
|
|
||||||
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const U_row = (__global float*) ((__global char*) U_ptr_ + U_pitch_*l);
|
|
||||||
|
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
|
||||||
const unsigned int k = bx + i;
|
|
||||||
if (k < nx_+1 && l < ny_) {
|
|
||||||
U_shared[j][i] = U_row[k];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
U_shared[j][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Read V into shared memory
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
|
||||||
const unsigned int l = by + j;
|
|
||||||
//Compute the pointer to current row in the V array
|
|
||||||
__global float* const V_row = (__global float*) ((__global char*) V_ptr_ + V_pitch_*l);
|
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
|
||||||
const unsigned int k = bx + i;
|
|
||||||
if (k < nx_ && l < ny_+1) {
|
|
||||||
V_shared[j][i] = V_row[k];
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
V_shared[j][i] = 0.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//Make sure all threads have read into shared mem
|
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
|
|
||||||
//Compute the eta at the next timestep
|
|
||||||
float eta_next = eta_current - dt_/dx_ * (U_shared[ty][tx+1] - U_shared[ty][tx])
|
|
||||||
- dt_/dy_ * (V_shared[ty+1][tx] - V_shared[ty][tx]);
|
|
||||||
|
|
||||||
//Write to main memory
|
|
||||||
if (ti < nx_ && tj < ny_) {
|
|
||||||
eta_row[ti] = eta_next;
|
|
||||||
}
|
|
||||||
}
|
|
@ -22,7 +22,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@ -53,24 +57,27 @@ class FORCE:
|
|||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = Common.get_kernel(self.cl_ctx, "FORCE_kernel.opencl", block_width, block_height)
|
self.force_module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
|
||||||
|
self.force_kernel = self.force_module.get_function("FORCEKernel")
|
||||||
|
self.force_kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 1
|
ghost_cells_x = 1
|
||||||
ghost_cells_y = 1
|
ghost_cells_y = 1
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
ghost_cells_x, ghost_cells_y, \
|
||||||
|
h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -86,10 +93,10 @@ class FORCE:
|
|||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -109,20 +116,20 @@ class FORCE:
|
|||||||
if (local_dt <= 0.0):
|
if (local_dt <= 0.0):
|
||||||
break
|
break
|
||||||
|
|
||||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.force_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
|
||||||
self.t += local_dt
|
self.t += local_dt
|
||||||
|
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
return self.t
|
return self.t
|
||||||
|
|
||||||
@ -131,5 +138,5 @@ class FORCE:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -19,14 +19,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
__device__
|
||||||
__local float F[3][block_height+1][block_width+1],
|
void computeFluxF(float Q[3][block_height+2][block_width+2],
|
||||||
|
float F[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
|
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -34,16 +35,17 @@ void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
|||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Compute fluxes along the x axis
|
//Compute fluxes along the x axis
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
int j=ty;
|
||||||
const int l = j + 1; //Skip ghost cells
|
const int l = j + 1; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i;
|
const int k = i;
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
const float3 Qp = (float3)(Q[0][l][k+1],
|
const float3 Qp = make_float3(Q[0][l][k+1],
|
||||||
Q[1][l][k+1],
|
Q[1][l][k+1],
|
||||||
Q[2][l][k+1]);
|
Q[2][l][k+1]);
|
||||||
const float3 Qm = (float3)(Q[0][l][k],
|
const float3 Qm = make_float3(Q[0][l][k],
|
||||||
Q[1][l][k],
|
Q[1][l][k],
|
||||||
Q[2][l][k]);
|
Q[2][l][k]);
|
||||||
|
|
||||||
@ -54,32 +56,33 @@ void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
|||||||
F[2][j][i] = flux.z;
|
F[2][j][i] = flux.z;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
__device__
|
||||||
__local float G[3][block_height+1][block_width+1],
|
void computeFluxG(float Q[3][block_height+2][block_width+2],
|
||||||
|
float G[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Compute fluxes along the y axis
|
//Compute fluxes along the y axis
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j;
|
const int l = j;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
int i=tx;
|
||||||
const int k = i + 1; //Skip ghost cells
|
const int k = i + 1; //Skip ghost cells
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
// Note that we swap hu and hv
|
// Note that we swap hu and hv
|
||||||
const float3 Qp = (float3)(Q[0][l+1][k],
|
const float3 Qp = make_float3(Q[0][l+1][k],
|
||||||
Q[2][l+1][k],
|
Q[2][l+1][k],
|
||||||
Q[1][l+1][k]);
|
Q[1][l+1][k]);
|
||||||
const float3 Qm = (float3)(Q[0][l][k],
|
const float3 Qm = make_float3(Q[0][l][k],
|
||||||
Q[2][l][k],
|
Q[2][l][k],
|
||||||
Q[1][l][k]);
|
Q[1][l][k]);
|
||||||
|
|
||||||
@ -91,39 +94,26 @@ void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
|||||||
G[2][j][i] = flux.y;
|
G[2][j][i] = flux.y;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void swe_2D(
|
__global__ void FORCEKernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_,
|
float g_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
//Index of thread within block
|
__shared__ float Q[3][block_height+2][block_width+2];
|
||||||
const int tx = get_local_id(0);
|
__shared__ float F[3][block_height+1][block_width+1];
|
||||||
const int ty = get_local_id(1);
|
|
||||||
|
|
||||||
//Index of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
|
||||||
|
|
||||||
//Index of cell within domain
|
|
||||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
|
||||||
const int tj = get_global_id(1) + 1;
|
|
||||||
|
|
||||||
__local float Q[3][block_height+2][block_width+2];
|
|
||||||
__local float F[3][block_height+1][block_width+1];
|
|
||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
@ -131,34 +121,28 @@ __kernel void swe_2D(
|
|||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Save our input variables
|
|
||||||
const float h0 = Q[0][ty+1][tx+1];
|
|
||||||
const float hu0 = Q[1][ty+1][tx+1];
|
|
||||||
const float hv0 = Q[2][ty+1][tx+1];
|
|
||||||
|
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary1(Q, nx_, ny_);
|
noFlowBoundary1(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute flux along x, and evolve
|
//Compute flux along x, and evolve
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF1(Q, F, nx_, ny_, dx_, dt_);
|
evolveF1(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary1(Q, nx_, ny_);
|
noFlowBoundary1(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute flux along y, and evolve
|
//Compute flux along y, and evolve
|
||||||
computeFluxG(Q, F, g_, dy_, dt_);
|
computeFluxG(Q, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG1(Q, F, nx_, ny_, dy_, dt_);
|
evolveG1(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Write to main memory
|
//Write to main memory
|
||||||
writeBlock1(h1_ptr_, h1_pitch_,
|
writeBlock1(h1_ptr_, h1_pitch_,
|
@ -21,9 +21,12 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -39,8 +42,8 @@ class HLL:
|
|||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||||
u0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||||
v0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||||
nx: Number of cells along x-axis
|
nx: Number of cells along x-axis
|
||||||
ny: Number of cells along y-axis
|
ny: Number of cells along y-axis
|
||||||
dx: Grid cell spacing along x-axis (20 000 m)
|
dx: Grid cell spacing along x-axis (20 000 m)
|
||||||
@ -49,24 +52,27 @@ class HLL:
|
|||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx,
|
context, \
|
||||||
h0, u0, v0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.lxf_kernel = Common.get_kernel(self.cl_ctx, "HLL_kernel.opencl", block_width, block_height)
|
self.hll_module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
|
||||||
|
self.hll_kernel = self.hll_module.get_function("HLLKernel")
|
||||||
|
self.hll_kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 1
|
ghost_cells_x = 1
|
||||||
ghost_cells_y = 1
|
ghost_cells_y = 1
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, u0, v0)
|
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
ghost_cells_x, ghost_cells_y, \
|
||||||
|
h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -82,10 +88,10 @@ class HLL:
|
|||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -105,20 +111,20 @@ class HLL:
|
|||||||
if (local_dt <= 0.0):
|
if (local_dt <= 0.0):
|
||||||
break
|
break
|
||||||
|
|
||||||
self.lxf_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.hll_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
|
||||||
self.t += local_dt
|
self.t += local_dt
|
||||||
|
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
return self.t
|
return self.t
|
||||||
|
|
||||||
@ -127,5 +133,5 @@ class HLL:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -21,7 +21,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@ -50,25 +54,28 @@ class HLL2:
|
|||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
theta=1.8, \
|
theta=1.8, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.swe_kernel = Common.get_kernel(self.cl_ctx, "HLL2_kernel.opencl", block_width, block_height)
|
self.hll2_module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
|
||||||
|
self.hll2_kernel = self.hll2_module.get_function("HLL2Kernel")
|
||||||
|
self.hll2_kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 2
|
ghost_cells_x = 2
|
||||||
ghost_cells_y = 2
|
ghost_cells_y = 2
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
ghost_cells_x, ghost_cells_y, \
|
||||||
|
h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -85,15 +92,15 @@ class HLL2:
|
|||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer contact discontinuity"
|
return "Harten-Lax-van Leer (2nd order)"
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -111,34 +118,34 @@ class HLL2:
|
|||||||
break
|
break
|
||||||
|
|
||||||
#Along X, then Y
|
#Along X, then Y
|
||||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
#Along Y, then X
|
#Along Y, then X
|
||||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
self.t += local_dt
|
self.t += local_dt
|
||||||
|
|
||||||
@ -148,5 +155,5 @@ class HLL2:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -29,31 +29,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
__device__
|
||||||
__local float Qx[3][block_height+2][block_width+2],
|
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
||||||
__local float F[3][block_height+1][block_width+1],
|
float Qx[3][block_height+2][block_width+2],
|
||||||
|
float F[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
const int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
// Reconstruct point values of Q at the left and right hand side
|
// Reconstruct point values of Q at the left and right hand side
|
||||||
// of the cell for both the left (i) and right (i+1) cell
|
// of the cell for both the left (i) and right (i+1) cell
|
||||||
const float3 Q_rl = (float3)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
const float3 Q_rl = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
||||||
const float3 Q_rr = (float3)(Q[0][l][k+1] + 0.5f*Qx[0][j][i+1],
|
const float3 Q_rr = make_float3(Q[0][l][k+1] + 0.5f*Qx[0][j][i+1],
|
||||||
Q[1][l][k+1] + 0.5f*Qx[1][j][i+1],
|
Q[1][l][k+1] + 0.5f*Qx[1][j][i+1],
|
||||||
Q[2][l][k+1] + 0.5f*Qx[2][j][i+1]);
|
Q[2][l][k+1] + 0.5f*Qx[2][j][i+1]);
|
||||||
|
|
||||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qx[0][j][i],
|
const float3 Q_ll = make_float3(Q[0][l][k] - 0.5f*Qx[0][j][i],
|
||||||
Q[1][l][k] - 0.5f*Qx[1][j][i],
|
Q[1][l][k] - 0.5f*Qx[1][j][i],
|
||||||
Q[2][l][k] - 0.5f*Qx[2][j][i]);
|
Q[2][l][k] - 0.5f*Qx[2][j][i]);
|
||||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qx[0][j][i],
|
const float3 Q_lr = make_float3(Q[0][l][k] + 0.5f*Qx[0][j][i],
|
||||||
Q[1][l][k] + 0.5f*Qx[1][j][i],
|
Q[1][l][k] + 0.5f*Qx[1][j][i],
|
||||||
Q[2][l][k] + 0.5f*Qx[2][j][i]);
|
Q[2][l][k] + 0.5f*Qx[2][j][i]);
|
||||||
|
|
||||||
@ -79,32 +81,34 @@ void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
__device__
|
||||||
__local float Qy[3][block_height+2][block_width+2],
|
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
||||||
__local float G[3][block_height+1][block_width+1],
|
float Qy[3][block_height+2][block_width+2],
|
||||||
|
float G[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
int i=tx;
|
||||||
const int k = i + 2; //Skip ghost cells
|
const int k = i + 2; //Skip ghost cells
|
||||||
// Reconstruct point values of Q at the left and right hand side
|
// Reconstruct point values of Q at the left and right hand side
|
||||||
// of the cell for both the left (i) and right (i+1) cell
|
// of the cell for both the left (i) and right (i+1) cell
|
||||||
//NOte that hu and hv are swapped ("transposing" the domain)!
|
//NOte that hu and hv are swapped ("transposing" the domain)!
|
||||||
const float3 Q_rl = (float3)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
const float3 Q_rl = make_float3(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
||||||
const float3 Q_rr = (float3)(Q[0][l+1][k] + 0.5f*Qy[0][j+1][i],
|
const float3 Q_rr = make_float3(Q[0][l+1][k] + 0.5f*Qy[0][j+1][i],
|
||||||
Q[2][l+1][k] + 0.5f*Qy[2][j+1][i],
|
Q[2][l+1][k] + 0.5f*Qy[2][j+1][i],
|
||||||
Q[1][l+1][k] + 0.5f*Qy[1][j+1][i]);
|
Q[1][l+1][k] + 0.5f*Qy[1][j+1][i]);
|
||||||
|
|
||||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qy[0][j][i],
|
const float3 Q_ll = make_float3(Q[0][l][k] - 0.5f*Qy[0][j][i],
|
||||||
Q[2][l][k] - 0.5f*Qy[2][j][i],
|
Q[2][l][k] - 0.5f*Qy[2][j][i],
|
||||||
Q[1][l][k] - 0.5f*Qy[1][j][i]);
|
Q[1][l][k] - 0.5f*Qy[1][j][i]);
|
||||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qy[0][j][i],
|
const float3 Q_lr = make_float3(Q[0][l][k] + 0.5f*Qy[0][j][i],
|
||||||
Q[2][l][k] + 0.5f*Qy[2][j][i],
|
Q[2][l][k] + 0.5f*Qy[2][j][i],
|
||||||
Q[1][l][k] + 0.5f*Qy[1][j][i]);
|
Q[1][l][k] + 0.5f*Qy[1][j][i]);
|
||||||
|
|
||||||
@ -131,7 +135,7 @@ void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
__kernel void swe_2D(
|
__global__ void HLL2Kernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_,
|
float g_,
|
||||||
@ -141,19 +145,19 @@ __kernel void swe_2D(
|
|||||||
int step_,
|
int step_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__local float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][block_height+4][block_width+4];
|
||||||
__local float Qx[3][block_height+2][block_width+2];
|
__shared__ float Qx[3][block_height+2][block_width+2];
|
||||||
__local float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][block_height+1][block_width+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -163,55 +167,55 @@ __kernel void swe_2D(
|
|||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Step 0 => evolve x first, then y
|
//Step 0 => evolve x first, then y
|
||||||
if (step_ == 0) {
|
if (step_ == 0) {
|
||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
minmodSlopeX(Q, Qx, theta_);
|
minmodSlopeX(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
minmodSlopeY(Q, Qx, theta_);
|
minmodSlopeY(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
}
|
}
|
||||||
//Step 1 => evolve y first, then x
|
//Step 1 => evolve y first, then x
|
||||||
else {
|
else {
|
||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
minmodSlopeY(Q, Qx, theta_);
|
minmodSlopeY(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
minmodSlopeX(Q, Qx, theta_);
|
minmodSlopeX(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -19,7 +19,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -28,20 +28,22 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
__device__
|
||||||
__local float F[3][block_height+1][block_width+1],
|
void computeFluxF(float Q[3][block_height+2][block_width+2],
|
||||||
|
float F[3][block_height+1][block_width+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
const int j=ty;
|
||||||
const int l = j + 1; //Skip ghost cells
|
const int l = j + 1; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i;
|
const int k = i;
|
||||||
|
|
||||||
const float3 Q_l = (float3)(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
const float3 Q_l = make_float3(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
||||||
const float3 Q_r = (float3)(Q[0][l][k+1], Q[1][l][k+1], Q[2][l][k+1]);
|
const float3 Q_r = make_float3(Q[0][l][k+1], Q[1][l][k+1], Q[2][l][k+1]);
|
||||||
|
|
||||||
const float3 flux = HLL_flux(Q_l, Q_r, g_);
|
const float3 flux = HLL_flux(Q_l, Q_r, g_);
|
||||||
|
|
||||||
@ -58,23 +60,25 @@ void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
__device__
|
||||||
__local float G[3][block_height+1][block_width+1],
|
void computeFluxG(float Q[3][block_height+2][block_width+2],
|
||||||
|
float G[3][block_height+1][block_width+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j;
|
const int l = j;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
const int i=tx;
|
||||||
const int k = i + 1; //Skip ghost cells
|
const int k = i + 1; //Skip ghost cells
|
||||||
|
|
||||||
//NOte that hu and hv are swapped ("transposing" the domain)!
|
//NOte that hu and hv are swapped ("transposing" the domain)!
|
||||||
const float3 Q_l = (float3)(Q[0][l ][k], Q[2][l ][k], Q[1][l ][k]);
|
const float3 Q_l = make_float3(Q[0][l ][k], Q[2][l ][k], Q[1][l ][k]);
|
||||||
const float3 Q_r = (float3)(Q[0][l+1][k], Q[2][l+1][k], Q[1][l+1][k]);
|
const float3 Q_r = make_float3(Q[0][l+1][k], Q[2][l+1][k], Q[1][l+1][k]);
|
||||||
|
|
||||||
// Computed flux
|
// Computed flux
|
||||||
const float3 flux = HLL_flux(Q_l, Q_r, g_);
|
const float3 flux = HLL_flux(Q_l, Q_r, g_);
|
||||||
@ -100,23 +104,23 @@ void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
__kernel void swe_2D(
|
__global__ void HLLKernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_,
|
float g_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__local float Q[3][block_height+2][block_width+2];
|
__shared__ float Q[3][block_height+2][block_width+2];
|
||||||
__local float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][block_height+1][block_width+1];
|
||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
@ -124,28 +128,30 @@ __kernel void swe_2D(
|
|||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
noFlowBoundary1(Q, nx_, ny_);
|
noFlowBoundary1(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute F flux
|
//Compute F flux
|
||||||
computeFluxF(Q, F, g_);
|
computeFluxF(Q, F, g_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF1(Q, F, nx_, ny_, dx_, dt_);
|
evolveF1(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary1(Q, nx_, ny_);
|
noFlowBoundary1(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute G flux
|
//Compute G flux
|
||||||
computeFluxG(Q, F, g_);
|
computeFluxG(Q, F, g_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG1(Q, F, nx_, ny_, dy_, dt_);
|
evolveG1(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
|
//Q[0][get_local_id(1) + 1][get_local_id(0) + 1] += 0.1;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Write to main memory for all internal cells
|
// Write to main memory for all internal cells
|
@ -26,7 +26,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@ -63,26 +67,25 @@ class KP07:
|
|||||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, f=0.0, r=0.0, \
|
g, theta=1.3, \
|
||||||
theta=1.3, use_rk2=True,
|
r=0.0, use_rk2=True,
|
||||||
wind_stress=Common.WindStressParams(), \
|
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.kp07_kernel = Common.get_kernel(self.cl_ctx, "KP07_kernel.opencl", block_width, block_height)
|
self.kp07_module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
|
||||||
|
self.kp07_kernel = self.kp07_module.get_function("KP07Kernel")
|
||||||
|
self.kp07_kernel.prepare("iiffffffiPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 2
|
ghost_cells_x = 2
|
||||||
ghost_cells_y = 2
|
ghost_cells_y = 2
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -93,26 +96,24 @@ class KP07:
|
|||||||
self.dy = np.float32(dy)
|
self.dy = np.float32(dy)
|
||||||
self.dt = np.float32(dt)
|
self.dt = np.float32(dt)
|
||||||
self.g = np.float32(g)
|
self.g = np.float32(g)
|
||||||
self.f = np.float32(f)
|
|
||||||
self.r = np.float32(r)
|
|
||||||
self.theta = np.float32(theta)
|
self.theta = np.float32(theta)
|
||||||
|
self.r = np.float32(r)
|
||||||
self.use_rk2 = use_rk2
|
self.use_rk2 = use_rk2
|
||||||
self.wind_stress = wind_stress
|
|
||||||
|
|
||||||
#Initialize time
|
#Initialize time
|
||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova"
|
return "Kurganov-Petrova 2007"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Function which steps n timesteps
|
Function which steps n timesteps
|
||||||
@ -127,64 +128,47 @@ class KP07:
|
|||||||
break
|
break
|
||||||
|
|
||||||
if (self.use_rk2):
|
if (self.use_rk2):
|
||||||
self.kp07_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
self.f, \
|
|
||||||
self.r, \
|
self.r, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
self.kp07_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
self.f, \
|
|
||||||
self.r, \
|
self.r, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch)
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
else:
|
else:
|
||||||
self.kp07_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
self.f, \
|
|
||||||
self.r, \
|
self.r, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch, \
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.wind_stress.type, \
|
|
||||||
self.wind_stress.tau0, self.wind_stress.rho, self.wind_stress.alpha, self.wind_stress.xm, self.wind_stress.Rc, \
|
|
||||||
self.wind_stress.x0, self.wind_stress.y0, \
|
|
||||||
self.wind_stress.u0, self.wind_stress.v0, \
|
|
||||||
self.t)
|
|
||||||
self.cl_data.swap()
|
self.cl_data.swap()
|
||||||
|
|
||||||
self.t += local_dt
|
self.t += local_dt
|
||||||
@ -196,5 +180,5 @@ class KP07:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -26,7 +26,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@ -51,25 +55,25 @@ class KP07_dimsplit:
|
|||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
theta=1.3, \
|
theta=1.3, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.swe_kernel = Common.get_kernel(self.cl_ctx, "KP07_dimsplit_kernel.opencl", block_width, block_height)
|
self.kp07_dimsplit_module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
|
||||||
|
self.kp07_dimsplit_kernel = self.kp07_dimsplit_module.get_function("KP07DimsplitKernel")
|
||||||
|
self.kp07_dimsplit_kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 2
|
ghost_cells_x = 2
|
||||||
ghost_cells_y = 2
|
ghost_cells_y = 2
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -86,15 +90,15 @@ class KP07_dimsplit:
|
|||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova dimensionally split"
|
return "Kurganov-Petrova 2007 dimensionally split"
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -113,34 +117,34 @@ class KP07_dimsplit:
|
|||||||
break
|
break
|
||||||
|
|
||||||
#Along X, then Y
|
#Along X, then Y
|
||||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
#Along Y, then X
|
#Along Y, then X
|
||||||
self.swe_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
self.t += 2.0*local_dt
|
self.t += 2.0*local_dt
|
||||||
|
|
||||||
@ -151,5 +155,5 @@ class KP07_dimsplit:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -24,35 +24,36 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
|
__device__
|
||||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
||||||
__local float Qx[3][block_height+2][block_width+2],
|
float Qx[3][block_height+2][block_width+2],
|
||||||
__local float F[3][block_height+1][block_width+1],
|
float F[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
// Reconstruct point values of Q at the left and right hand side
|
// Reconstruct point values of Q at the left and right hand side
|
||||||
// of the cell for both the left (i) and right (i+1) cell
|
// of the cell for both the left (i) and right (i+1) cell
|
||||||
const float3 Q_rl = (float3)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
const float3 Q_rl = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
||||||
const float3 Q_rr = (float3)(Q[0][l][k+1] + 0.5f*Qx[0][j][i+1],
|
const float3 Q_rr = make_float3(Q[0][l][k+1] + 0.5f*Qx[0][j][i+1],
|
||||||
Q[1][l][k+1] + 0.5f*Qx[1][j][i+1],
|
Q[1][l][k+1] + 0.5f*Qx[1][j][i+1],
|
||||||
Q[2][l][k+1] + 0.5f*Qx[2][j][i+1]);
|
Q[2][l][k+1] + 0.5f*Qx[2][j][i+1]);
|
||||||
|
|
||||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qx[0][j][i],
|
const float3 Q_ll = make_float3(Q[0][l][k] - 0.5f*Qx[0][j][i],
|
||||||
Q[1][l][k] - 0.5f*Qx[1][j][i],
|
Q[1][l][k] - 0.5f*Qx[1][j][i],
|
||||||
Q[2][l][k] - 0.5f*Qx[2][j][i]);
|
Q[2][l][k] - 0.5f*Qx[2][j][i]);
|
||||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qx[0][j][i],
|
const float3 Q_lr = make_float3(Q[0][l][k] + 0.5f*Qx[0][j][i],
|
||||||
Q[1][l][k] + 0.5f*Qx[1][j][i],
|
Q[1][l][k] + 0.5f*Qx[1][j][i],
|
||||||
Q[2][l][k] + 0.5f*Qx[2][j][i]);
|
Q[2][l][k] + 0.5f*Qx[2][j][i]);
|
||||||
|
|
||||||
@ -71,32 +72,34 @@ void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
__device__
|
||||||
__local float Qy[3][block_height+2][block_width+2],
|
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
||||||
__local float G[3][block_height+1][block_width+1],
|
float Qy[3][block_height+2][block_width+2],
|
||||||
|
float G[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
int i=tx;
|
||||||
const int k = i + 2; //Skip ghost cells
|
const int k = i + 2; //Skip ghost cells
|
||||||
// Reconstruct point values of Q at the left and right hand side
|
// Reconstruct point values of Q at the left and right hand side
|
||||||
// of the cell for both the left (i) and right (i+1) cell
|
// of the cell for both the left (i) and right (i+1) cell
|
||||||
//NOte that hu and hv are swapped ("transposing" the domain)!
|
//NOte that hu and hv are swapped ("transposing" the domain)!
|
||||||
const float3 Q_rl = (float3)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
const float3 Q_rl = make_float3(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
||||||
const float3 Q_rr = (float3)(Q[0][l+1][k] + 0.5f*Qy[0][j+1][i],
|
const float3 Q_rr = make_float3(Q[0][l+1][k] + 0.5f*Qy[0][j+1][i],
|
||||||
Q[2][l+1][k] + 0.5f*Qy[2][j+1][i],
|
Q[2][l+1][k] + 0.5f*Qy[2][j+1][i],
|
||||||
Q[1][l+1][k] + 0.5f*Qy[1][j+1][i]);
|
Q[1][l+1][k] + 0.5f*Qy[1][j+1][i]);
|
||||||
|
|
||||||
const float3 Q_ll = (float3)(Q[0][l][k] - 0.5f*Qy[0][j][i],
|
const float3 Q_ll = make_float3(Q[0][l][k] - 0.5f*Qy[0][j][i],
|
||||||
Q[2][l][k] - 0.5f*Qy[2][j][i],
|
Q[2][l][k] - 0.5f*Qy[2][j][i],
|
||||||
Q[1][l][k] - 0.5f*Qy[1][j][i]);
|
Q[1][l][k] - 0.5f*Qy[1][j][i]);
|
||||||
const float3 Q_lr = (float3)(Q[0][l][k] + 0.5f*Qy[0][j][i],
|
const float3 Q_lr = make_float3(Q[0][l][k] + 0.5f*Qy[0][j][i],
|
||||||
Q[2][l][k] + 0.5f*Qy[2][j][i],
|
Q[2][l][k] + 0.5f*Qy[2][j][i],
|
||||||
Q[1][l][k] + 0.5f*Qy[1][j][i]);
|
Q[1][l][k] + 0.5f*Qy[1][j][i]);
|
||||||
|
|
||||||
@ -122,7 +125,7 @@ void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
|
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
|
||||||
*/
|
*/
|
||||||
__kernel void swe_2D(
|
__global__ void KP07DimsplitKernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_,
|
float g_,
|
||||||
@ -132,20 +135,20 @@ __kernel void swe_2D(
|
|||||||
int step_,
|
int step_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__local float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][block_height+4][block_width+4];
|
||||||
__local float Qx[3][block_height+2][block_width+2];
|
__shared__ float Qx[3][block_height+2][block_width+2];
|
||||||
__local float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][block_height+1][block_width+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -154,12 +157,12 @@ __kernel void swe_2D(
|
|||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -167,45 +170,45 @@ __kernel void swe_2D(
|
|||||||
if (step_ == 0) {
|
if (step_ == 0) {
|
||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
minmodSlopeX(Q, Qx, theta_);
|
minmodSlopeX(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
minmodSlopeY(Q, Qx, theta_);
|
minmodSlopeY(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
}
|
}
|
||||||
//Step 1 => evolve y first, then x
|
//Step 1 => evolve y first, then x
|
||||||
else {
|
else {
|
||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
minmodSlopeY(Q, Qx, theta_);
|
minmodSlopeY(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
computeFluxG(Q, Qx, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
minmodSlopeX(Q, Qx, theta_);
|
minmodSlopeX(Q, Qx, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
computeFluxF(Q, Qx, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -24,27 +24,28 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
|
__device__
|
||||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
||||||
__local float Qx[3][block_height+2][block_width+2],
|
float Qx[3][block_height+2][block_width+2],
|
||||||
__local float F[3][block_height+1][block_width+1],
|
float F[3][block_height+1][block_width+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
const float3 Qp = (float3)(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
const float3 Qp = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||||
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
Q[1][l][k+1] - 0.5f*Qx[1][j][i+1],
|
||||||
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
Q[2][l][k+1] - 0.5f*Qx[2][j][i+1]);
|
||||||
const float3 Qm = (float3)(Q[0][l][k ] + 0.5f*Qx[0][j][i ],
|
const float3 Qm = make_float3(Q[0][l][k ] + 0.5f*Qx[0][j][i ],
|
||||||
Q[1][l][k ] + 0.5f*Qx[1][j][i ],
|
Q[1][l][k ] + 0.5f*Qx[1][j][i ],
|
||||||
Q[2][l][k ] + 0.5f*Qx[2][j][i ]);
|
Q[2][l][k ] + 0.5f*Qx[2][j][i ]);
|
||||||
|
|
||||||
@ -57,24 +58,26 @@ void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
__device__
|
||||||
__local float Qy[3][block_height+2][block_width+2],
|
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
||||||
__local float G[3][block_height+1][block_width+1],
|
float Qy[3][block_height+2][block_width+2],
|
||||||
|
float G[3][block_height+1][block_width+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
int i=tx;
|
||||||
const int k = i + 2; //Skip ghost cells
|
const int k = i + 2; //Skip ghost cells
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
// Note that we swap hu and hv
|
// Note that we swap hu and hv
|
||||||
const float3 Qp = (float3)(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
const float3 Qp = make_float3(Q[0][l+1][k] - 0.5f*Qy[0][j+1][i],
|
||||||
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
Q[2][l+1][k] - 0.5f*Qy[2][j+1][i],
|
||||||
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
Q[1][l+1][k] - 0.5f*Qy[1][j+1][i]);
|
||||||
const float3 Qm = (float3)(Q[0][l ][k] + 0.5f*Qy[0][j ][i],
|
const float3 Qm = make_float3(Q[0][l ][k] + 0.5f*Qy[0][j ][i],
|
||||||
Q[2][l ][k] + 0.5f*Qy[2][j ][i],
|
Q[2][l ][k] + 0.5f*Qy[2][j ][i],
|
||||||
Q[1][l ][k] + 0.5f*Qy[1][j ][i]);
|
Q[1][l ][k] + 0.5f*Qy[1][j ][i]);
|
||||||
|
|
||||||
@ -94,56 +97,44 @@ void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
|
* This unsplit kernel computes the 2D numerical scheme with a TVD RK2 time integration scheme
|
||||||
*/
|
*/
|
||||||
__kernel void swe_2D(
|
__global__ void KP07Kernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_,
|
float g_,
|
||||||
|
|
||||||
float theta_,
|
float theta_,
|
||||||
|
|
||||||
float f_, //< Coriolis coefficient
|
|
||||||
float r_, //< Bottom friction coefficient
|
float r_, //< Bottom friction coefficient
|
||||||
|
|
||||||
int step_,
|
int step_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
//Wind stress parameters
|
|
||||||
int wind_stress_type_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Index of block within domain
|
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
|
||||||
|
|
||||||
//Index of cell within domain
|
//Index of cell within domain
|
||||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||||
const int tj = get_global_id(1) + 2;
|
const int tj = get_global_id(1) + 2;
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__local float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][block_height+4][block_width+4];
|
||||||
|
|
||||||
//The following slightly wastes memory, but enables us to reuse the
|
//The following slightly wastes memory, but enables us to reuse the
|
||||||
//funcitons in common.opencl
|
//funcitons in common.opencl
|
||||||
__local float Qx[3][block_height+2][block_width+2];
|
__shared__ float Qx[3][block_height+2][block_width+2];
|
||||||
__local float Qy[3][block_height+2][block_width+2];
|
__shared__ float Qy[3][block_height+2][block_width+2];
|
||||||
__local float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][block_height+1][block_width+1];
|
||||||
__local float G[3][block_height+1][block_width+1];
|
__shared__ float G[3][block_height+1][block_width+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -152,24 +143,24 @@ __kernel void swe_2D(
|
|||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct slopes along x and axis
|
//Reconstruct slopes along x and axis
|
||||||
minmodSlopeX(Q, Qx, theta_);
|
minmodSlopeX(Q, Qx, theta_);
|
||||||
minmodSlopeY(Q, Qy, theta_);
|
minmodSlopeY(Q, Qy, theta_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Compute fluxes along the x and y axis
|
//Compute fluxes along the x and y axis
|
||||||
computeFluxF(Q, Qx, F, g_);
|
computeFluxF(Q, Qx, F, g_);
|
||||||
computeFluxG(Q, Qy, G, g_);
|
computeFluxG(Q, Qy, G, g_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Sum fluxes and advance in time for all internal cells
|
//Sum fluxes and advance in time for all internal cells
|
||||||
@ -177,33 +168,16 @@ __kernel void swe_2D(
|
|||||||
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
||||||
const int j = ty + 2;
|
const int j = ty + 2;
|
||||||
|
|
||||||
const float X = windStressX(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
const float Y = windStressY(
|
|
||||||
wind_stress_type_,
|
|
||||||
dx_, dy_, dt_,
|
|
||||||
tau0_, rho_, alpha_, xm_, Rc_,
|
|
||||||
x0_, y0_,
|
|
||||||
u0_, v0_,
|
|
||||||
t_);
|
|
||||||
|
|
||||||
const float h1 = Q[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_
|
const float h1 = Q[0][j][i] + (F[0][ty][tx] - F[0][ty ][tx+1]) * dt_ / dx_
|
||||||
+ (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_;
|
+ (G[0][ty][tx] - G[0][ty+1][tx ]) * dt_ / dy_;
|
||||||
const float hu1 = Q[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_
|
const float hu1 = Q[1][j][i] + (F[1][ty][tx] - F[1][ty ][tx+1]) * dt_ / dx_
|
||||||
+ (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_
|
+ (G[1][ty][tx] - G[1][ty+1][tx ]) * dt_ / dy_;
|
||||||
+ dt_*X - dt_*f_*Q[2][j][i];
|
|
||||||
const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
||||||
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_
|
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_;
|
||||||
+ dt_*Y + dt_*f_*Q[1][j][i];
|
|
||||||
|
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj);
|
float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj);
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj);
|
float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj);
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj);
|
float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj);
|
||||||
|
|
||||||
const float C = 2.0f*r_*dt_/Q[0][j][i];
|
const float C = 2.0f*r_*dt_/Q[0][j][i];
|
||||||
|
|
@ -22,7 +22,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@ -31,12 +35,8 @@ from SWESimulators import Common
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
Class that solves the SW equations using the Lax Friedrichs scheme
|
||||||
"""
|
"""
|
||||||
class LxF:
|
class LxF:
|
||||||
|
|
||||||
@ -53,24 +53,27 @@ class LxF:
|
|||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.lxf_kernel = Common.get_kernel(self.cl_ctx, "LxF_kernel.opencl", block_width, block_height)
|
self.lxf_module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
|
||||||
|
self.lxf_kernel = self.lxf_module.get_function("LxFKernel")
|
||||||
|
self.lxf_kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 1
|
ghost_cells_x = 1
|
||||||
ghost_cells_y = 1
|
ghost_cells_y = 1
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
ghost_cells_x, ghost_cells_y, \
|
||||||
|
h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -86,10 +89,10 @@ class LxF:
|
|||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -109,20 +112,20 @@ class LxF:
|
|||||||
if (local_dt <= 0.0):
|
if (local_dt <= 0.0):
|
||||||
break
|
break
|
||||||
|
|
||||||
self.lxf_kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.lxf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
|
||||||
self.t += local_dt
|
self.t += local_dt
|
||||||
|
|
||||||
self.cl_data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
return self.t
|
return self.t
|
||||||
|
|
||||||
@ -131,5 +134,5 @@ class LxF:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -19,29 +19,31 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
__device__
|
||||||
__local float F[3][block_height][block_width+1],
|
void computeFluxF(float Q[3][block_height+2][block_width+2],
|
||||||
|
float F[3][block_height][block_width+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
const int j=ty;
|
||||||
const int l = j + 1; //Skip ghost cells
|
const int l = j + 1; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i;
|
const int k = i;
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
const float3 Qp = (float3)(Q[0][l][k+1],
|
const float3 Qp = make_float3(Q[0][l][k+1],
|
||||||
Q[1][l][k+1],
|
Q[1][l][k+1],
|
||||||
Q[2][l][k+1]);
|
Q[2][l][k+1]);
|
||||||
const float3 Qm = (float3)(Q[0][l][k],
|
const float3 Qm = make_float3(Q[0][l][k],
|
||||||
Q[1][l][k],
|
Q[1][l][k],
|
||||||
Q[2][l][k]);
|
Q[2][l][k]);
|
||||||
|
|
||||||
@ -58,24 +60,26 @@ void computeFluxF(__local float Q[3][block_height+2][block_width+2],
|
|||||||
/**
|
/**
|
||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
__device__
|
||||||
__local float G[3][block_height+1][block_width],
|
void computeFluxG(float Q[3][block_height+2][block_width+2],
|
||||||
|
float G[3][block_height+1][block_width],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j;
|
const int l = j;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
const int i=tx;
|
||||||
const int k = i + 1; //Skip ghost cells
|
const int k = i + 1; //Skip ghost cells
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
// Note that we swap hu and hv
|
// Note that we swap hu and hv
|
||||||
const float3 Qp = (float3)(Q[0][l+1][k],
|
const float3 Qp = make_float3(Q[0][l+1][k],
|
||||||
Q[2][l+1][k],
|
Q[2][l+1][k],
|
||||||
Q[1][l+1][k]);
|
Q[1][l+1][k]);
|
||||||
const float3 Qm = (float3)(Q[0][l][k],
|
const float3 Qm = make_float3(Q[0][l][k],
|
||||||
Q[2][l][k],
|
Q[2][l][k],
|
||||||
Q[1][l][k]);
|
Q[1][l][k]);
|
||||||
|
|
||||||
@ -90,45 +94,45 @@ void computeFluxG(__local float Q[3][block_height+2][block_width+2],
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
__kernel void swe_2D(
|
__global__ void LxFKernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_,
|
float g_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
//Index of cell within domain
|
//Index of cell within domain
|
||||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||||
const int tj = get_global_id(1) + 1;
|
const int tj = get_global_id(1) + 1;
|
||||||
|
|
||||||
__local float Q[3][block_height+2][block_width+2];
|
__shared__ float Q[3][block_height+2][block_width+2];
|
||||||
__local float F[3][block_height][block_width+1];
|
__shared__ float F[3][block_height][block_width+1];
|
||||||
__local float G[3][block_height+1][block_width];
|
__shared__ float G[3][block_height+1][block_width];
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock1(h0_ptr_, h0_pitch_,
|
readBlock1(h0_ptr_, h0_pitch_,
|
||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary1(Q, nx_, ny_);
|
noFlowBoundary1(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Compute fluxes along the x and y axis
|
//Compute fluxes along the x and y axis
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
computeFluxG(Q, G, g_, dy_, dt_);
|
computeFluxG(Q, G, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Evolve for all internal cells
|
//Evolve for all internal cells
|
||||||
@ -147,9 +151,9 @@ __kernel void swe_2D(
|
|||||||
const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
const float hv1 = Q[2][j][i] + (F[2][ty][tx] - F[2][ty ][tx+1]) * dt_ / dx_
|
||||||
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_;
|
+ (G[2][ty][tx] - G[2][ty+1][tx ]) * dt_ / dy_;
|
||||||
|
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h1_ptr_ + h1_pitch_*tj);
|
float* const h_row = (float*) ((char*) h1_ptr_ + h1_pitch_*tj);
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu1_ptr_ + hu1_pitch_*tj);
|
float* const hu_row = (float*) ((char*) hu1_ptr_ + hu1_pitch_*tj);
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv1_ptr_ + hv1_pitch_*tj);
|
float* const hv_row = (float*) ((char*) hv1_ptr_ + hv1_pitch_*tj);
|
||||||
|
|
||||||
h_row[ti] = h1;
|
h_row[ti] = h1;
|
||||||
hu_row[ti] = hu1;
|
hu_row[ti] = hu1;
|
@ -22,7 +22,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pyopencl as cl #OpenCL in Python
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@ -47,24 +51,24 @@ class WAF:
|
|||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
cl_ctx, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
self.cl_ctx = cl_ctx
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
#Create an OpenCL command queue
|
|
||||||
self.cl_queue = cl.CommandQueue(self.cl_ctx)
|
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = Common.get_kernel(self.cl_ctx, "WAF_kernel.opencl", block_width, block_height)
|
self.waf_module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
|
||||||
|
self.waf_kernel = self.waf_module.get_function("WAFKernel")
|
||||||
|
self.waf_kernel.prepare("iiffffiPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
ghost_cells_x = 2
|
ghost_cells_x = 2
|
||||||
ghost_cells_y = 2
|
ghost_cells_y = 2
|
||||||
self.cl_data = Common.SWEDataArkawaA(self.cl_ctx, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@ -80,14 +84,16 @@ class WAF:
|
|||||||
self.t = np.float32(0.0)
|
self.t = np.float32(0.0)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height)
|
self.local_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0])) * self.local_size[0]), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1])) * self.local_size[1]) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "Weighted average flux"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Function which steps n timesteps
|
Function which steps n timesteps
|
||||||
@ -104,32 +110,30 @@ class WAF:
|
|||||||
break
|
break
|
||||||
|
|
||||||
#Along X, then Y
|
#Along X, then Y
|
||||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.cl_data.swap()
|
|
||||||
|
|
||||||
#Along Y, then X
|
#Along Y, then X
|
||||||
self.kernel.swe_2D(self.cl_queue, self.global_size, self.local_size, \
|
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, local_dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.cl_data.h0.data, self.cl_data.h0.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.cl_data.hu0.data, self.cl_data.hu0.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.cl_data.hv0.data, self.cl_data.hv0.pitch, \
|
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
|
||||||
self.cl_data.h1.data, self.cl_data.h1.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.cl_data.hu1.data, self.cl_data.hu1.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.cl_data.hv1.data, self.cl_data.hv1.pitch)
|
self.data.hv0.data.gpudata, self.data.hv0.pitch)
|
||||||
self.cl_data.swap()
|
|
||||||
|
|
||||||
self.t += local_dt
|
self.t += local_dt
|
||||||
|
|
||||||
@ -140,5 +144,5 @@ class WAF:
|
|||||||
|
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.cl_data.download(self.cl_queue)
|
return self.data.download(self.stream)
|
||||||
|
|
||||||
|
@ -24,30 +24,32 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
#include "common.opencl"
|
#include "common.cu"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
__device__
|
||||||
__local float F[3][block_height+1][block_width+1],
|
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
||||||
|
float F[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+1; i+=block_width) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
const float3 Ql2 = (float3)(Q[0][l][k-1], Q[1][l][k-1], Q[2][l][k-1]);
|
const float3 Ql2 = make_float3(Q[0][l][k-1], Q[1][l][k-1], Q[2][l][k-1]);
|
||||||
const float3 Ql1 = (float3)(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
const float3 Ql1 = make_float3(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
||||||
const float3 Qr1 = (float3)(Q[0][l][k+1], Q[1][l][k+1], Q[2][l][k+1]);
|
const float3 Qr1 = make_float3(Q[0][l][k+1], Q[1][l][k+1], Q[2][l][k+1]);
|
||||||
const float3 Qr2 = (float3)(Q[0][l][k+2], Q[1][l][k+2], Q[2][l][k+2]);
|
const float3 Qr2 = make_float3(Q[0][l][k+2], Q[1][l][k+2], Q[2][l][k+2]);
|
||||||
|
|
||||||
// Computed flux
|
// Computed flux
|
||||||
const float3 flux = WAF_1D_flux(Ql2, Ql1, Qr1, Qr2, g_, dx_, dt_);
|
const float3 flux = WAF_1D_flux(Ql2, Ql1, Qr1, Qr2, g_, dx_, dt_);
|
||||||
@ -68,24 +70,26 @@ void computeFluxF(__local float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
__device__
|
||||||
__local float G[3][block_height+1][block_width+1],
|
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
||||||
|
float G[3][block_height+1][block_width+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Compute fluxes along the y axis
|
//Compute fluxes along the y axis
|
||||||
for (int j=ty; j<block_height+1; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+1; j+=block_height) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
int i=tx;
|
||||||
const int k = i + 2; //Skip ghost cells
|
const int k = i + 2; //Skip ghost cells
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
// Note that we swap hu and hv
|
// Note that we swap hu and hv
|
||||||
const float3 Ql2 = (float3)(Q[0][l-1][k], Q[2][l-1][k], Q[1][l-1][k]);
|
const float3 Ql2 = make_float3(Q[0][l-1][k], Q[2][l-1][k], Q[1][l-1][k]);
|
||||||
const float3 Ql1 = (float3)(Q[0][l ][k], Q[2][l ][k], Q[1][l ][k]);
|
const float3 Ql1 = make_float3(Q[0][l ][k], Q[2][l ][k], Q[1][l ][k]);
|
||||||
const float3 Qr1 = (float3)(Q[0][l+1][k], Q[2][l+1][k], Q[1][l+1][k]);
|
const float3 Qr1 = make_float3(Q[0][l+1][k], Q[2][l+1][k], Q[1][l+1][k]);
|
||||||
const float3 Qr2 = (float3)(Q[0][l+2][k], Q[2][l+2][k], Q[1][l+2][k]);
|
const float3 Qr2 = make_float3(Q[0][l+2][k], Q[2][l+2][k], Q[1][l+2][k]);
|
||||||
|
|
||||||
// Computed flux
|
// Computed flux
|
||||||
// Note that we swap back
|
// Note that we swap back
|
||||||
@ -110,23 +114,23 @@ void computeFluxG(__local float Q[3][block_height+4][block_width+4],
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
__kernel void swe_2D(
|
__global__ void WAFKernel(
|
||||||
int nx_, int ny_,
|
int nx_, int ny_,
|
||||||
float dx_, float dy_, float dt_,
|
float dx_, float dy_, float dt_,
|
||||||
float g_, int step_,
|
float g_, int step_,
|
||||||
|
|
||||||
//Input h^n
|
//Input h^n
|
||||||
__global float* h0_ptr_, int h0_pitch_,
|
float* h0_ptr_, int h0_pitch_,
|
||||||
__global float* hu0_ptr_, int hu0_pitch_,
|
float* hu0_ptr_, int hu0_pitch_,
|
||||||
__global float* hv0_ptr_, int hv0_pitch_,
|
float* hv0_ptr_, int hv0_pitch_,
|
||||||
|
|
||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
__global float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
__global float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
__global float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__local float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][block_height+4][block_width+4];
|
||||||
__local float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][block_height+1][block_width+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -135,12 +139,12 @@ __kernel void swe_2D(
|
|||||||
hu0_ptr_, hu0_pitch_,
|
hu0_ptr_, hu0_pitch_,
|
||||||
hv0_ptr_, hv0_pitch_,
|
hv0_ptr_, hv0_pitch_,
|
||||||
Q, nx_, ny_);
|
Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
//Set boundary conditions
|
//Set boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -148,37 +152,37 @@ __kernel void swe_2D(
|
|||||||
if (step_ == 0) {
|
if (step_ == 0) {
|
||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
computeFluxG(Q, F, g_, dy_, dt_);
|
computeFluxG(Q, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
}
|
}
|
||||||
//Step 1 => evolve y first, then x
|
//Step 1 => evolve y first, then x
|
||||||
else {
|
else {
|
||||||
//Compute fluxes along the y axis and evolve
|
//Compute fluxes along the y axis and evolve
|
||||||
computeFluxG(Q, F, g_, dy_, dt_);
|
computeFluxG(Q, F, g_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
evolveG2(Q, F, nx_, ny_, dy_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
noFlowBoundary2(Q, nx_, ny_);
|
noFlowBoundary2(Q, nx_, ny_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
|
|
||||||
//Compute fluxes along the x axis and evolve
|
//Compute fluxes along the x axis and evolve
|
||||||
computeFluxF(Q, F, g_, dx_, dt_);
|
computeFluxF(Q, F, g_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
evolveF2(Q, F, nx_, ny_, dx_, dt_);
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
__syncthreads();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -22,32 +22,97 @@ You should have received a copy of the GNU General Public License
|
|||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Location of thread in block
|
||||||
|
*/
|
||||||
|
inline __device__ int get_local_id(int dim) {
|
||||||
|
switch(dim) {
|
||||||
|
case 0: return threadIdx.x;
|
||||||
|
case 1: return threadIdx.y;
|
||||||
|
case 2: return threadIdx.z;
|
||||||
|
default: return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get block index
|
||||||
|
*/
|
||||||
|
__device__ int get_group_id(int dim) {
|
||||||
|
switch(dim) {
|
||||||
|
case 0: return blockIdx.x;
|
||||||
|
case 1: return blockIdx.y;
|
||||||
|
case 2: return blockIdx.z;
|
||||||
|
default: return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Location of thread in global domain
|
||||||
|
*/
|
||||||
|
__device__ int get_global_id(int dim) {
|
||||||
|
switch(dim) {
|
||||||
|
case 0: return blockDim.x*blockIdx.x + threadIdx.x;
|
||||||
|
case 1: return blockDim.y*blockIdx.y + threadIdx.y;
|
||||||
|
case 2: return blockDim.z*blockIdx.z + threadIdx.z;
|
||||||
|
default: return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Float3 operators
|
||||||
|
*/
|
||||||
|
inline __device__ float3 operator*(const float a, const float3 b) {
|
||||||
|
return make_float3(a*b.x, a*b.y, a*b.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __device__ float3 operator/(const float3 a, const float b) {
|
||||||
|
return make_float3(a.x/b, a.y/b, a.z/b);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __device__ float3 operator-(const float3 a, const float3 b) {
|
||||||
|
return make_float3(a.x-b.x, a.y-b.y, a.z-b.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __device__ float3 operator+(const float3 a, const float3 b) {
|
||||||
|
return make_float3(a.x+b.x, a.y+b.y, a.z+b.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline __device__ __host__ float clamp(const float f, const float a, const float b) {
|
||||||
|
return fmaxf(a, fminf(f, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a block of data with one ghost cell for the shallow water equations
|
* Reads a block of data with one ghost cell for the shallow water equations
|
||||||
*/
|
*/
|
||||||
void readBlock1(__global float* h_ptr_, int h_pitch_,
|
__device__ void readBlock1(float* h_ptr_, int h_pitch_,
|
||||||
__global float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
__global float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
__local float Q[3][block_height+2][block_width+2],
|
float Q[3][block_height+2][block_width+2],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Index of block within domain
|
//Index of block within domain
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
const int bx = block_width * get_group_id(0);
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
const int by = block_height * get_group_id(1);
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+2; j+=block_height) {
|
||||||
const int l = clamp(by + j, 0, ny_+1); // Out of bounds
|
const int l = clamp(by + j, 0, ny_+1); // Out of bounds
|
||||||
|
|
||||||
//Compute the pointer to current row in the arrays
|
//Compute the pointer to current row in the arrays
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*l);
|
float* const h_row = (float*) ((char*) h_ptr_ + h_pitch_*l);
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*l);
|
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*l);
|
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+2; i+=block_width) {
|
||||||
const int k = clamp(bx + i, 0, nx_+1); // Out of bounds
|
const int k = clamp(bx + i, 0, nx_+1); // Out of bounds
|
||||||
|
|
||||||
Q[0][j][i] = h_row[k];
|
Q[0][j][i] = h_row[k];
|
||||||
@ -64,29 +129,29 @@ void readBlock1(__global float* h_ptr_, int h_pitch_,
|
|||||||
/**
|
/**
|
||||||
* Reads a block of data with two ghost cells for the shallow water equations
|
* Reads a block of data with two ghost cells for the shallow water equations
|
||||||
*/
|
*/
|
||||||
void readBlock2(__global float* h_ptr_, int h_pitch_,
|
__device__ void readBlock2(float* h_ptr_, int h_pitch_,
|
||||||
__global float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
__global float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
__local float Q[3][block_height+4][block_width+4],
|
float Q[3][block_height+4][block_width+4],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Index of block within domain
|
//Index of block within domain
|
||||||
const int bx = get_local_size(0) * get_group_id(0);
|
const int bx = block_width * get_group_id(0);
|
||||||
const int by = get_local_size(1) * get_group_id(1);
|
const int by = block_height * get_group_id(1);
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
for (int j=ty; j<block_height+4; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+4; j+=block_height) {
|
||||||
const int l = clamp(by + j, 0, ny_+3); // Out of bounds
|
const int l = clamp(by + j, 0, ny_+3); // Out of bounds
|
||||||
|
|
||||||
//Compute the pointer to current row in the arrays
|
//Compute the pointer to current row in the arrays
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*l);
|
float* const h_row = (float*) ((char*) h_ptr_ + h_pitch_*l);
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*l);
|
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*l);
|
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
|
||||||
|
|
||||||
for (int i=tx; i<block_width+4; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+4; i+=block_width) {
|
||||||
const int k = clamp(bx + i, 0, nx_+3); // Out of bounds
|
const int k = clamp(bx + i, 0, nx_+3); // Out of bounds
|
||||||
|
|
||||||
Q[0][j][i] = h_row[k];
|
Q[0][j][i] = h_row[k];
|
||||||
@ -102,10 +167,10 @@ void readBlock2(__global float* h_ptr_, int h_pitch_,
|
|||||||
/**
|
/**
|
||||||
* Writes a block of data to global memory for the shallow water equations.
|
* Writes a block of data to global memory for the shallow water equations.
|
||||||
*/
|
*/
|
||||||
void writeBlock1(__global float* h_ptr_, int h_pitch_,
|
__device__ void writeBlock1(float* h_ptr_, int h_pitch_,
|
||||||
__global float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
__global float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
__local float Q[3][block_height+2][block_width+2],
|
float Q[3][block_height+2][block_width+2],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -120,9 +185,9 @@ void writeBlock1(__global float* h_ptr_, int h_pitch_,
|
|||||||
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
const int i = tx + 1; //Skip local ghost cells, i.e., +1
|
||||||
const int j = ty + 1;
|
const int j = ty + 1;
|
||||||
|
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*tj);
|
float* const h_row = (float*) ((char*) h_ptr_ + h_pitch_*tj);
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*tj);
|
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*tj);
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*tj);
|
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*tj);
|
||||||
|
|
||||||
h_row[ti] = Q[0][j][i];
|
h_row[ti] = Q[0][j][i];
|
||||||
hu_row[ti] = Q[1][j][i];
|
hu_row[ti] = Q[1][j][i];
|
||||||
@ -137,10 +202,10 @@ void writeBlock1(__global float* h_ptr_, int h_pitch_,
|
|||||||
/**
|
/**
|
||||||
* Writes a block of data to global memory for the shallow water equations.
|
* Writes a block of data to global memory for the shallow water equations.
|
||||||
*/
|
*/
|
||||||
void writeBlock2(__global float* h_ptr_, int h_pitch_,
|
__device__ void writeBlock2(float* h_ptr_, int h_pitch_,
|
||||||
__global float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
__global float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
__local float Q[3][block_height+4][block_width+4],
|
float Q[3][block_height+4][block_width+4],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -155,9 +220,9 @@ void writeBlock2(__global float* h_ptr_, int h_pitch_,
|
|||||||
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
const int i = tx + 2; //Skip local ghost cells, i.e., +2
|
||||||
const int j = ty + 2;
|
const int j = ty + 2;
|
||||||
|
|
||||||
__global float* const h_row = (__global float*) ((__global char*) h_ptr_ + h_pitch_*tj);
|
float* const h_row = (float*) ((char*) h_ptr_ + h_pitch_*tj);
|
||||||
__global float* const hu_row = (__global float*) ((__global char*) hu_ptr_ + hu_pitch_*tj);
|
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*tj);
|
||||||
__global float* const hv_row = (__global float*) ((__global char*) hv_ptr_ + hv_pitch_*tj);
|
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*tj);
|
||||||
|
|
||||||
h_row[ti] = Q[0][j][i];
|
h_row[ti] = Q[0][j][i];
|
||||||
hu_row[ti] = Q[1][j][i];
|
hu_row[ti] = Q[1][j][i];
|
||||||
@ -174,7 +239,7 @@ void writeBlock2(__global float* h_ptr_, int h_pitch_,
|
|||||||
* No flow boundary conditions for the shallow water equations
|
* No flow boundary conditions for the shallow water equations
|
||||||
* with one ghost cell in each direction
|
* with one ghost cell in each direction
|
||||||
*/
|
*/
|
||||||
void noFlowBoundary1(__local float Q[3][block_height+2][block_width+2], const int nx_, const int ny_) {
|
__device__ void noFlowBoundary1(float Q[3][block_height+2][block_width+2], const int nx_, const int ny_) {
|
||||||
//Global index
|
//Global index
|
||||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||||
const int tj = get_global_id(1) + 1;
|
const int tj = get_global_id(1) + 1;
|
||||||
@ -218,7 +283,7 @@ void noFlowBoundary1(__local float Q[3][block_height+2][block_width+2], const in
|
|||||||
* No flow boundary conditions for the shallow water equations
|
* No flow boundary conditions for the shallow water equations
|
||||||
* with two ghost cells in each direction
|
* with two ghost cells in each direction
|
||||||
*/
|
*/
|
||||||
void noFlowBoundary2(__local float Q[3][block_height+4][block_width+4], const int nx_, const int ny_) {
|
__device__ void noFlowBoundary2(float Q[3][block_height+4][block_width+4], const int nx_, const int ny_) {
|
||||||
//Global index
|
//Global index
|
||||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||||
const int tj = get_global_id(1) + 2;
|
const int tj = get_global_id(1) + 2;
|
||||||
@ -276,8 +341,8 @@ void noFlowBoundary2(__local float Q[3][block_height+4][block_width+4], const in
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the x axis (dimensional splitting)
|
* Evolves the solution in time along the x axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
void evolveF1(__local float Q[3][block_height+2][block_width+2],
|
__device__ void evolveF1(float Q[3][block_height+2][block_width+2],
|
||||||
__local float F[3][block_height+1][block_width+1],
|
float F[3][block_height+1][block_width+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dx_, const float dt_) {
|
const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -306,8 +371,8 @@ void evolveF1(__local float Q[3][block_height+2][block_width+2],
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the x axis (dimensional splitting)
|
* Evolves the solution in time along the x axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
void evolveF2(__local float Q[3][block_height+4][block_width+4],
|
__device__ void evolveF2(float Q[3][block_height+4][block_width+4],
|
||||||
__local float F[3][block_height+1][block_width+1],
|
float F[3][block_height+1][block_width+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dx_, const float dt_) {
|
const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -336,8 +401,8 @@ void evolveF2(__local float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
void evolveG1(__local float Q[3][block_height+2][block_width+2],
|
__device__ void evolveG1(float Q[3][block_height+2][block_width+2],
|
||||||
__local float G[3][block_height+1][block_width+1],
|
float G[3][block_height+1][block_width+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dy_, const float dt_) {
|
const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -367,8 +432,8 @@ void evolveG1(__local float Q[3][block_height+2][block_width+2],
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
void evolveG2(__local float Q[3][block_height+4][block_width+4],
|
__device__ void evolveG2(float Q[3][block_height+4][block_width+4],
|
||||||
__local float G[3][block_height+1][block_width+1],
|
float G[3][block_height+1][block_width+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dy_, const float dt_) {
|
const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -402,7 +467,7 @@ void evolveG2(__local float Q[3][block_height+4][block_width+4],
|
|||||||
* Reconstructs a slope using the minmod limiter based on three
|
* Reconstructs a slope using the minmod limiter based on three
|
||||||
* consecutive values
|
* consecutive values
|
||||||
*/
|
*/
|
||||||
float minmodSlope(float left, float center, float right, float theta) {
|
__device__ float minmodSlope(float left, float center, float right, float theta) {
|
||||||
const float backward = (center - left) * theta;
|
const float backward = (center - left) * theta;
|
||||||
const float central = (right - left) * 0.5f;
|
const float central = (right - left) * 0.5f;
|
||||||
const float forward = (right - center) * theta;
|
const float forward = (right - center) * theta;
|
||||||
@ -420,17 +485,18 @@ float minmodSlope(float left, float center, float right, float theta) {
|
|||||||
/**
|
/**
|
||||||
* Reconstructs a minmod slope for a whole block along x
|
* Reconstructs a minmod slope for a whole block along x
|
||||||
*/
|
*/
|
||||||
void minmodSlopeX(__local float Q[3][block_height+4][block_width+4],
|
__device__ void minmodSlopeX(float Q[3][block_height+4][block_width+4],
|
||||||
__local float Qx[3][block_height+2][block_width+2],
|
float Qx[3][block_height+2][block_width+2],
|
||||||
const float theta_) {
|
const float theta_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Reconstruct slopes along x axis
|
//Reconstruct slopes along x axis
|
||||||
for (int j=ty; j<block_height; j+=get_local_size(1)) {
|
{
|
||||||
|
const int j = ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+2; i+=get_local_size(0)) {
|
for (int i=tx; i<block_width+2; i+=block_width) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
for (int p=0; p<3; ++p) {
|
for (int p=0; p<3; ++p) {
|
||||||
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
||||||
@ -443,16 +509,17 @@ void minmodSlopeX(__local float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* Reconstructs a minmod slope for a whole block along y
|
* Reconstructs a minmod slope for a whole block along y
|
||||||
*/
|
*/
|
||||||
void minmodSlopeY(__local float Q[3][block_height+4][block_width+4],
|
__device__ void minmodSlopeY(float Q[3][block_height+4][block_width+4],
|
||||||
__local float Qy[3][block_height+2][block_width+2],
|
float Qy[3][block_height+2][block_width+2],
|
||||||
const float theta_) {
|
const float theta_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+2; j+=get_local_size(1)) {
|
for (int j=ty; j<block_height+2; j+=block_height) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
for (int i=tx; i<block_width; i+=get_local_size(0)) {
|
{
|
||||||
|
const int i = tx;
|
||||||
const int k = i + 2; //Skip ghost cells
|
const int k = i + 2; //Skip ghost cells
|
||||||
for (int p=0; p<3; ++p) {
|
for (int p=0; p<3; ++p) {
|
||||||
Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_);
|
Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_);
|
||||||
@ -466,91 +533,10 @@ void minmodSlopeY(__local float Q[3][block_height+4][block_width+4],
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
float windStressX(int wind_stress_type_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
|
|
||||||
float X = 0.0f;
|
|
||||||
|
|
||||||
switch (wind_stress_type_) {
|
|
||||||
case 0: //UNIFORM_ALONGSHORE
|
|
||||||
{
|
|
||||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
|
||||||
X = tau0_/rho_ * exp(-alpha_*y);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1: //BELL_SHAPED_ALONGSHORE
|
|
||||||
if (t_ <= 48.0f*3600.0f) {
|
|
||||||
const float a = alpha_*((get_global_id(0)+0.5f)*dx_-xm_);
|
|
||||||
const float aa = a*a;
|
|
||||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
|
||||||
X = tau0_/rho_ * exp(-aa) * exp(-alpha_*y);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 2: //MOVING_CYCLONE
|
|
||||||
{
|
|
||||||
const float x = (get_global_id(0))*dx_;
|
|
||||||
const float y = (get_global_id(1)+0.5f)*dy_;
|
|
||||||
const float a = (x-x0_-u0_*(t_+dt_));
|
|
||||||
const float aa = a*a;
|
|
||||||
const float b = (y-y0_-v0_*(t_+dt_));
|
|
||||||
const float bb = b*b;
|
|
||||||
const float r = sqrt(aa+bb);
|
|
||||||
const float c = 1.0f - r/Rc_;
|
|
||||||
const float xi = c*c;
|
|
||||||
|
|
||||||
X = -(tau0_/rho_) * (b/Rc_) * exp(-0.5f*xi);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return X;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
__device__ float3 F_func(const float3 Q, const float g) {
|
||||||
|
|
||||||
|
|
||||||
float windStressY(int wind_stress_type_,
|
|
||||||
float dx_, float dy_, float dt_,
|
|
||||||
float tau0_, float rho_, float alpha_, float xm_, float Rc_,
|
|
||||||
float x0_, float y0_,
|
|
||||||
float u0_, float v0_,
|
|
||||||
float t_) {
|
|
||||||
float Y = 0.0f;
|
|
||||||
|
|
||||||
switch (wind_stress_type_) {
|
|
||||||
case 2: //MOVING_CYCLONE:
|
|
||||||
{
|
|
||||||
const float x = (get_global_id(0)+0.5f)*dx_;
|
|
||||||
const float y = (get_global_id(1))*dy_;
|
|
||||||
const float a = (x-x0_-u0_*(t_+dt_));
|
|
||||||
const float aa = a*a;
|
|
||||||
const float b = (y-y0_-v0_*(t_+dt_));
|
|
||||||
const float bb = b*b;
|
|
||||||
const float r = sqrt(aa+bb);
|
|
||||||
const float c = 1.0f - r/Rc_;
|
|
||||||
const float xi = c*c;
|
|
||||||
|
|
||||||
Y = (tau0_/rho_) * (a/Rc_) * exp(-0.5f*xi);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Y;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
float3 F_func(const float3 Q, const float g) {
|
|
||||||
float3 F;
|
float3 F;
|
||||||
|
|
||||||
F.x = Q.y; //hu
|
F.x = Q.y; //hu
|
||||||
@ -567,7 +553,7 @@ float3 F_func(const float3 Q, const float g) {
|
|||||||
/**
|
/**
|
||||||
* Central upwind flux function
|
* Central upwind flux function
|
||||||
*/
|
*/
|
||||||
float3 CentralUpwindFlux(const float3 Qm, float3 Qp, const float g) {
|
__device__ float3 CentralUpwindFlux(const float3 Qm, float3 Qp, const float g) {
|
||||||
const float3 Fp = F_func(Qp, g);
|
const float3 Fp = F_func(Qp, g);
|
||||||
const float up = Qp.y / Qp.x; // hu / h
|
const float up = Qp.y / Qp.x; // hu / h
|
||||||
const float cp = sqrt(g*Qp.x); // sqrt(g*h)
|
const float cp = sqrt(g*Qp.x); // sqrt(g*h)
|
||||||
@ -594,7 +580,7 @@ float3 CentralUpwindFlux(const float3 Qm, float3 Qp, const float g) {
|
|||||||
/**
|
/**
|
||||||
* Harten-Lax-van Leer with contact discontinuity (Toro 2001, p 180)
|
* Harten-Lax-van Leer with contact discontinuity (Toro 2001, p 180)
|
||||||
*/
|
*/
|
||||||
float3 HLL_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
__device__ float3 HLL_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
||||||
const float h_l = Q_l.x;
|
const float h_l = Q_l.x;
|
||||||
const float h_r = Q_r.x;
|
const float h_r = Q_r.x;
|
||||||
|
|
||||||
@ -646,7 +632,7 @@ float3 HLL_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
|||||||
/**
|
/**
|
||||||
* Harten-Lax-van Leer with contact discontinuity (Toro 2001, p 181)
|
* Harten-Lax-van Leer with contact discontinuity (Toro 2001, p 181)
|
||||||
*/
|
*/
|
||||||
float3 HLLC_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
__device__ float3 HLLC_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
||||||
const float h_l = Q_l.x;
|
const float h_l = Q_l.x;
|
||||||
const float h_r = Q_r.x;
|
const float h_r = Q_r.x;
|
||||||
|
|
||||||
@ -685,19 +671,19 @@ float3 HLLC_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
|||||||
//Or estimate flux in the "left star" region
|
//Or estimate flux in the "left star" region
|
||||||
else if (S_l <= 0.0f && 0.0f <=S_star) {
|
else if (S_l <= 0.0f && 0.0f <=S_star) {
|
||||||
const float v_l = Q_l.z / h_l;
|
const float v_l = Q_l.z / h_l;
|
||||||
const float3 Q_star_l = h_l * (S_l - u_l) / (S_l - S_star) * (float3)(1, S_star, v_l);
|
const float3 Q_star_l = h_l * (S_l - u_l) / (S_l - S_star) * make_float3(1, S_star, v_l);
|
||||||
const float3 flux = F_l + S_l*(Q_star_l - Q_l);
|
const float3 flux = F_l + S_l*(Q_star_l - Q_l);
|
||||||
return flux;
|
return flux;
|
||||||
}
|
}
|
||||||
//Or estimate flux in the "righ star" region
|
//Or estimate flux in the "righ star" region
|
||||||
else if (S_star <= 0.0f && 0.0f <=S_r) {
|
else if (S_star <= 0.0f && 0.0f <=S_r) {
|
||||||
const float v_r = Q_r.z / h_r;
|
const float v_r = Q_r.z / h_r;
|
||||||
const float3 Q_star_r = h_r * (S_r - u_r) / (S_r - S_star) * (float3)(1, S_star, v_r);
|
const float3 Q_star_r = h_r * (S_r - u_r) / (S_r - S_star) * make_float3(1, S_star, v_r);
|
||||||
const float3 flux = F_r + S_r*(Q_star_r - Q_r);
|
const float3 flux = F_r + S_r*(Q_star_r - Q_r);
|
||||||
return flux;
|
return flux;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return -99999.9f; //Something wrong here
|
return make_float3(-99999.9f, -99999.9f, -99999.9f); //Something wrong here
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -709,7 +695,7 @@ float3 HLLC_flux(const float3 Q_l, const float3 Q_r, const float g_) {
|
|||||||
* @param r_ the ratio of upwind change (see Toro 2001, p. 203/204)
|
* @param r_ the ratio of upwind change (see Toro 2001, p. 203/204)
|
||||||
* @param c_ the courant number for wave k, dt*S_k/dx
|
* @param c_ the courant number for wave k, dt*S_k/dx
|
||||||
*/
|
*/
|
||||||
float WAF_superbee(float r_, float c_) {
|
__device__ float WAF_superbee(float r_, float c_) {
|
||||||
// r <= 0.0
|
// r <= 0.0
|
||||||
if (r_ <= 0.0f) {
|
if (r_ <= 0.0f) {
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
@ -735,7 +721,7 @@ float WAF_superbee(float r_, float c_) {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
float WAF_albada(float r_, float c_) {
|
__device__ float WAF_albada(float r_, float c_) {
|
||||||
if (r_ <= 0.0f) {
|
if (r_ <= 0.0f) {
|
||||||
return 1.0f;
|
return 1.0f;
|
||||||
}
|
}
|
||||||
@ -744,32 +730,29 @@ float WAF_albada(float r_, float c_) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ float WAF_minmod(float r_, float c_) {
|
||||||
float WAF_minbee(float r_, float c_) {
|
return 1.0f - (1.0f - fabs(c_)) * fmax(0.0f, fmin(1.0f, r_));
|
||||||
if (r_ <= 0.0f) {
|
|
||||||
return 1.0f;
|
|
||||||
}
|
}
|
||||||
else if (r_ >= 0.0f && r_ <= 1.0f) {
|
|
||||||
|
__device__ float minmod(float r_) {
|
||||||
|
return fmax(0.0f, fmin(1.0f, r_));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ float superbee(float r_) {
|
||||||
|
return fmax(0.0f, fmax(fmin(2.0f*r_, 1.0f), fmin(r_, 2.0f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ float vanAlbada1(float r_) {
|
||||||
|
return (r_*r_ + r_) / (r_*r_ + 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ float vanLeer(float r_) {
|
||||||
|
return (r_ + fabs(r_)) / (1.0f + fabs(r_));
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ float limiterToWAFLimiter(float r_, float c_) {
|
||||||
return 1.0f - (1.0f - fabs(c_))*r_;
|
return 1.0f - (1.0f - fabs(c_))*r_;
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
return fabs(c_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
float WAF_minmod(float r_, float c_) {
|
|
||||||
if (r_ <= 0.0f) {
|
|
||||||
return fabs(c_);
|
|
||||||
}
|
|
||||||
else if (r_ <= 1.0f) {
|
|
||||||
return (1.0f - r_) * (1.0f - c_);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -780,7 +763,7 @@ float WAF_minmod(float r_, float c_) {
|
|||||||
* @param Q_r1 Q_{i+1}
|
* @param Q_r1 Q_{i+1}
|
||||||
* @param Q_r2 Q_{i+2}
|
* @param Q_r2 Q_{i+2}
|
||||||
*/
|
*/
|
||||||
float3 WAF_1D_flux(const float3 Q_l2, const float3 Q_l1, const float3 Q_r1, const float3 Q_r2, const float g_, const float dx_, const float dt_) {
|
__device__ float3 WAF_1D_flux(const float3 Q_l2, const float3 Q_l1, const float3 Q_r1, const float3 Q_r2, const float g_, const float dx_, const float dt_) {
|
||||||
const float h_l = Q_l1.x;
|
const float h_l = Q_l1.x;
|
||||||
const float h_r = Q_r1.x;
|
const float h_r = Q_r1.x;
|
||||||
|
|
||||||
@ -811,12 +794,12 @@ float3 WAF_1D_flux(const float3 Q_l2, const float3 Q_l1, const float3 Q_r1, cons
|
|||||||
const float q_r = (h_dag > h_r) ? q_r_tmp : 1.0f;
|
const float q_r = (h_dag > h_r) ? q_r_tmp : 1.0f;
|
||||||
|
|
||||||
// Compute wave speed estimates
|
// Compute wave speed estimates
|
||||||
const float S_l = u_l - c_l;//*q_l;
|
const float S_l = u_l - c_l*q_l; //FIXME: Right wave speed estimate?
|
||||||
const float S_r = u_r + c_r;//*q_r;
|
const float S_r = u_r + c_r*q_r;
|
||||||
const float S_star = ( S_l*h_r*(u_r - S_r) - S_r*h_l*(u_l - S_l) ) / ( h_r*(u_r - S_r) - h_l*(u_l - S_l) );
|
const float S_star = ( S_l*h_r*(u_r - S_r) - S_r*h_l*(u_l - S_l) ) / ( h_r*(u_r - S_r) - h_l*(u_l - S_l) );
|
||||||
|
|
||||||
const float3 Q_star_l = h_l * (S_l - u_l) / (S_l - S_star) * (float3)(1, S_star, v_l);
|
const float3 Q_star_l = h_l * (S_l - u_l) / (S_l - S_star) * make_float3(1, S_star, v_l);
|
||||||
const float3 Q_star_r = h_r * (S_r - u_r) / (S_r - S_star) * (float3)(1, S_star, v_r);
|
const float3 Q_star_r = h_r * (S_r - u_r) / (S_r - S_star) * make_float3(1, S_star, v_r);
|
||||||
|
|
||||||
// Estimate the fluxes in the four regions
|
// Estimate the fluxes in the four regions
|
||||||
const float3 F_1 = F_func(Q_l1, g_);
|
const float3 F_1 = F_func(Q_l1, g_);
|
||||||
@ -833,27 +816,40 @@ float3 WAF_1D_flux(const float3 Q_l2, const float3 Q_l1, const float3 Q_r1, cons
|
|||||||
const float c_3 = S_r * dt_ / dx_;
|
const float c_3 = S_r * dt_ / dx_;
|
||||||
|
|
||||||
// Compute the "upwind change" vectors for the i-3/2 and i+3/2 interfaces
|
// Compute the "upwind change" vectors for the i-3/2 and i+3/2 interfaces
|
||||||
const float rh_m = fmin(fmax( (h_l - h_l2) / (h_r - h_l), -1.0f ), 1.0f);
|
const float rh_m = (h_l - h_l2) / (h_r - h_l);
|
||||||
const float rh_p = fmin(fmax( (h_r2 - h_r) / (h_r - h_l), -1.0f ), 1.0f);
|
const float rh_p = (h_r2 - h_r) / (h_r - h_l);
|
||||||
|
|
||||||
const float rv_m = fmin(fmax( (v_l - v_l2) / (v_r - v_l), -1.0f ), 1.0f);
|
const float rv_m = (v_l - v_l2) / (v_r - v_l);
|
||||||
const float rv_p = fmin(fmax( (v_r2 - v_r) / (v_r - v_l), -1.0f ), 1.0f);
|
const float rv_p = (v_r2 - v_r) / (v_r - v_l);
|
||||||
|
|
||||||
// Compute the r parameters for the flux limiter
|
// Compute the r parameters for the flux limiter
|
||||||
const float rh_1 = (c_1 > 0.0f) ? rh_m : rh_p;
|
const float rh_1 = (c_1 > 0.0f) ? rh_m : rh_p;
|
||||||
const float rv_1 = (c_1 > 0.0f) ? rv_m : rv_p;
|
//const float rv_1 = (c_1 > 0.0f) ? rv_m : rv_p;
|
||||||
|
|
||||||
const float rh_2 = (c_2 > 0.0f) ? rh_m : rh_p;
|
//const float rh_2 = (c_2 > 0.0f) ? rh_m : rh_p;
|
||||||
const float rv_2 = (c_2 > 0.0f) ? rv_m : rv_p;
|
const float rv_2 = (c_2 > 0.0f) ? rv_m : rv_p;
|
||||||
|
|
||||||
const float rh_3 = (c_3 > 0.0f) ? rh_m : rh_p;
|
const float rh_3 = (c_3 > 0.0f) ? rh_m : rh_p;
|
||||||
const float rv_3 = (c_3 > 0.0f) ? rv_m : rv_p;
|
//const float rv_3 = (c_3 > 0.0f) ? rv_m : rv_p;
|
||||||
|
|
||||||
// Compute the limiter
|
// Compute the limiter
|
||||||
// We use h for the nonlinear waves, and v for the middle shear wave
|
// We use h for the nonlinear waves, and v for the middle shear wave
|
||||||
const float A_1 = c_1;//sign(c_1)*WAF_minbee(rh_1, c_1);
|
///**
|
||||||
const float A_2 = c_2;//sign(c_2)*WAF_minbee(rv_2, c_2); //Middle shear wave
|
const float A_1 = copysign(1.0f, c_1) * WAF_minmod(rh_1, c_1);
|
||||||
const float A_3 = c_3;//sign(c_3)*WAF_minbee(rh_3, c_3);
|
const float A_2 = copysign(1.0f, c_2) * WAF_minmod(rv_2, c_2); //Middle shear wave
|
||||||
|
const float A_3 = copysign(1.0f, c_3) * WAF_minmod(rh_3, c_3);
|
||||||
|
//*/
|
||||||
|
/**
|
||||||
|
//2nd order for smooth cases (unstable for shocks)
|
||||||
|
const float A_1 = c_1;
|
||||||
|
const float A_2 = c_2;
|
||||||
|
const float A_3 = c_3;
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
const float A_1 = sign(c_1) * limiterToWAFLimiter(minmod(rh_1), c_1);
|
||||||
|
const float A_2 = sign(c_2) * limiterToWAFLimiter(minmod(rv_2), c_2);
|
||||||
|
const float A_3 = sign(c_3) * limiterToWAFLimiter(minmod(rh_3), c_3);
|
||||||
|
*/
|
||||||
|
|
||||||
//Average the fluxes
|
//Average the fluxes
|
||||||
const float3 flux = 0.5f*( F_1 + F_4 )
|
const float3 flux = 0.5f*( F_1 + F_4 )
|
||||||
@ -899,11 +895,11 @@ float3 WAF_1D_flux(const float3 Q_l2, const float3 Q_l1, const float3 Q_r1, cons
|
|||||||
/**
|
/**
|
||||||
* Lax-Friedrichs flux (Toro 2001, p 163)
|
* Lax-Friedrichs flux (Toro 2001, p 163)
|
||||||
*/
|
*/
|
||||||
float3 LxF_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
__device__ float3 LxF_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||||
const float3 F_l = F_func(Q_l, g_);
|
const float3 F_l = F_func(Q_l, g_);
|
||||||
const float3 F_r = F_func(Q_r, g_);
|
const float3 F_r = F_func(Q_r, g_);
|
||||||
|
|
||||||
return 0.5f*(F_l + F_r) + (Q_l - Q_r) * dx_ / (2.0f*dt_);
|
return 0.5f*(F_l + F_r) + (dx_/(2.0f*dt_))*(Q_l - Q_r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -911,12 +907,12 @@ float3 LxF_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const flo
|
|||||||
/**
|
/**
|
||||||
* Lax-Friedrichs extended to 2D
|
* Lax-Friedrichs extended to 2D
|
||||||
*/
|
*/
|
||||||
float3 LxF_2D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
__device__ float3 LxF_2D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||||
const float3 F_l = F_func(Q_l, g_);
|
const float3 F_l = F_func(Q_l, g_);
|
||||||
const float3 F_r = F_func(Q_r, g_);
|
const float3 F_r = F_func(Q_r, g_);
|
||||||
|
|
||||||
//Note numerical diffusion for 2D here (0.25)
|
//Note numerical diffusion for 2D here (0.25)
|
||||||
return 0.5f*(F_l + F_r) + (Q_l - Q_r) * dx_ / (4.0f*dt_);
|
return 0.5f*(F_l + F_r) + (dx_/(4.0f*dt_))*(Q_l - Q_r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -925,11 +921,11 @@ float3 LxF_2D_flux(const float3 Q_l, const float3 Q_r, const float g_, const flo
|
|||||||
/**
|
/**
|
||||||
* Richtmeyer / Two-step Lax-Wendroff flux (Toro 2001, p 164)
|
* Richtmeyer / Two-step Lax-Wendroff flux (Toro 2001, p 164)
|
||||||
*/
|
*/
|
||||||
float3 LxW2_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
__device__ float3 LxW2_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||||
const float3 F_l = F_func(Q_l, g_);
|
const float3 F_l = F_func(Q_l, g_);
|
||||||
const float3 F_r = F_func(Q_r, g_);
|
const float3 F_r = F_func(Q_r, g_);
|
||||||
|
|
||||||
const float3 Q_lw2 = 0.5f*(Q_l + Q_r) + (F_l - F_r)*dt_/(2.0f*dx_);
|
const float3 Q_lw2 = 0.5f*(Q_l + Q_r) + (dt_/(2.0f*dx_))*(F_l - F_r);
|
||||||
|
|
||||||
return F_func(Q_lw2, g_);
|
return F_func(Q_lw2, g_);
|
||||||
}
|
}
|
||||||
@ -942,11 +938,11 @@ float3 LxW2_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const fl
|
|||||||
/**
|
/**
|
||||||
* Godunovs centered scheme (Toro 2001, p 165)
|
* Godunovs centered scheme (Toro 2001, p 165)
|
||||||
*/
|
*/
|
||||||
float3 GodC_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
__device__ float3 GodC_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||||
const float3 F_l = F_func(Q_l, g_);
|
const float3 F_l = F_func(Q_l, g_);
|
||||||
const float3 F_r = F_func(Q_r, g_);
|
const float3 F_r = F_func(Q_r, g_);
|
||||||
|
|
||||||
const float3 Q_godc = 0.5f*(Q_l + Q_r) + (F_l - F_r)*dt_/dx_;
|
const float3 Q_godc = 0.5f*(Q_l + Q_r) + (dt_/dx_)*(F_l - F_r);
|
||||||
|
|
||||||
return F_func(Q_godc, g_);
|
return F_func(Q_godc, g_);
|
||||||
}
|
}
|
||||||
@ -957,7 +953,7 @@ float3 GodC_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const fl
|
|||||||
/**
|
/**
|
||||||
* First Ordered Centered (Toro 2001, p.163)
|
* First Ordered Centered (Toro 2001, p.163)
|
||||||
*/
|
*/
|
||||||
float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
__device__ float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float g_, const float dx_, const float dt_) {
|
||||||
const float3 F_lf = LxF_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
const float3 F_lf = LxF_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
||||||
const float3 F_lw2 = LxW2_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
const float3 F_lw2 = LxW2_1D_flux(Q_l, Q_r, g_, dx_, dt_);
|
||||||
return 0.5f*(F_lf + F_lw2);
|
return 0.5f*(F_lf + F_lw2);
|
3083
WAFExp.ipynb
Normal file
3083
WAFExp.ipynb
Normal file
File diff suppressed because one or more lines are too long
1042
shock1d_ref_nx=1024.csv
Normal file
1042
shock1d_ref_nx=1024.csv
Normal file
File diff suppressed because it is too large
Load Diff
146
shock1d_ref_nx=128.csv
Normal file
146
shock1d_ref_nx=128.csv
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
##############################################################################
|
||||||
|
# Generated by SWASHES version 1.03.00, 2016-01-29
|
||||||
|
##############################################################################
|
||||||
|
# Dimension: 1
|
||||||
|
# Type: 3 (=Dam break)
|
||||||
|
# Domain: 1
|
||||||
|
# Choice: 1 (=on a wet domain without friction (Stoker's solution))
|
||||||
|
##############################################################################
|
||||||
|
# PARAMETERS OF THE SOLUTION
|
||||||
|
#
|
||||||
|
# Length of the domain: 10 meters
|
||||||
|
# Space step: 0.078125 meters
|
||||||
|
# Number of cells: 128
|
||||||
|
# Position of the dam: x=5 meters
|
||||||
|
# Time value: 6 seconds
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
#(i-0.5)*dx h[i] u[i] topo[i] q[i] topo[i]+h[i] Fr[i]=Froude topo[i]+hc[i]
|
||||||
|
0.0390625 0.005 0 0 0 0.005 0 0
|
||||||
|
0.117188 0.005 0 0 0 0.005 0 0
|
||||||
|
0.195312 0.005 0 0 0 0.005 0 0
|
||||||
|
0.273438 0.005 0 0 0 0.005 0 0
|
||||||
|
0.351562 0.005 0 0 0 0.005 0 0
|
||||||
|
0.429688 0.005 0 0 0 0.005 0 0
|
||||||
|
0.507812 0.005 0 0 0 0.005 0 0
|
||||||
|
0.585938 0.005 0 0 0 0.005 0 0
|
||||||
|
0.664062 0.005 0 0 0 0.005 0 0
|
||||||
|
0.742188 0.005 0 0 0 0.005 0 0
|
||||||
|
0.820312 0.005 0 0 0 0.005 0 0
|
||||||
|
0.898438 0.005 0 0 0 0.005 0 0
|
||||||
|
0.976562 0.005 0 0 0 0.005 0 0
|
||||||
|
1.05469 0.005 0 0 0 0.005 0 0
|
||||||
|
1.13281 0.005 0 0 0 0.005 0 0
|
||||||
|
1.21094 0.005 0 0 0 0.005 0 0
|
||||||
|
1.28906 0.005 0 0 0 0.005 0 0
|
||||||
|
1.36719 0.005 0 0 0 0.005 0 0
|
||||||
|
1.44531 0.005 0 0 0 0.005 0 0
|
||||||
|
1.52344 0.005 0 0 0 0.005 0 0
|
||||||
|
1.60156 0.005 0 0 0 0.005 0 0
|
||||||
|
1.67969 0.005 0 0 0 0.005 0 0
|
||||||
|
1.75781 0.005 0 0 0 0.005 0 0
|
||||||
|
1.83594 0.005 0 0 0 0.005 0 0
|
||||||
|
1.91406 0.005 0 0 0 0.005 0 0
|
||||||
|
1.99219 0.005 0 0 0 0.005 0 0
|
||||||
|
2.07031 0.005 0 0 0 0.005 0 0
|
||||||
|
2.14844 0.005 0 0 0 0.005 0 0
|
||||||
|
2.22656 0.005 0 0 0 0.005 0 0
|
||||||
|
2.30469 0.005 0 0 0 0.005 0 0
|
||||||
|
2.38281 0.005 0 0 0 0.005 0 0
|
||||||
|
2.46094 0.005 0 0 0 0.005 0 0
|
||||||
|
2.53906 0.005 0 0 0 0.005 0 0
|
||||||
|
2.61719 0.005 0 0 0 0.005 0 0
|
||||||
|
2.69531 0.005 0 0 0 0.005 0 0
|
||||||
|
2.77344 0.005 0 0 0 0.005 0 0
|
||||||
|
2.85156 0.005 0 0 0 0.005 0 0
|
||||||
|
2.92969 0.005 0 0 0 0.005 0 0
|
||||||
|
3.00781 0.005 0 0 0 0.005 0 0
|
||||||
|
3.08594 0.005 0 0 0 0.005 0 0
|
||||||
|
3.16406 0.005 0 0 0 0.005 0 0
|
||||||
|
3.24219 0.005 0 0 0 0.005 0 0
|
||||||
|
3.32031 0.005 0 0 0 0.005 0 0
|
||||||
|
3.39844 0.005 0 0 0 0.005 0 0
|
||||||
|
3.47656 0.005 0 0 0 0.005 0 0
|
||||||
|
3.55469 0.005 0 0 0 0.005 0 0
|
||||||
|
3.63281 0.005 0 0 0 0.005 0 0
|
||||||
|
3.71094 0.00490073 0.00441906 0 2.16566e-005 0.00490073 0.0201542 0.000362943
|
||||||
|
3.78906 0.00470863 0.0130996 0 6.16813e-005 0.00470863 0.0609504 0.000729255
|
||||||
|
3.86719 0.00452038 0.0217802 0 9.84546e-005 0.00452038 0.103428 0.000996019
|
||||||
|
3.94531 0.00433596 0.0304607 0 0.000132076 0.00433596 0.147694 0.00121151
|
||||||
|
4.02344 0.00415538 0.0391413 0 0.000162647 0.00415538 0.193863 0.0013919
|
||||||
|
4.10156 0.00397865 0.0478218 0 0.000190266 0.00397865 0.242061 0.00154532
|
||||||
|
4.17969 0.00380575 0.0565024 0 0.000215034 0.00380575 0.292423 0.00167667
|
||||||
|
4.25781 0.0036367 0.065183 0 0.000237051 0.0036367 0.345101 0.00178925
|
||||||
|
4.33594 0.00347148 0.0738635 0 0.000256416 0.00347148 0.400256 0.00188541
|
||||||
|
4.41406 0.00331011 0.0825441 0 0.00027323 0.00331011 0.458068 0.00196696
|
||||||
|
4.49219 0.00315257 0.0912246 0 0.000287592 0.00315257 0.518734 0.0020353
|
||||||
|
4.57031 0.00299888 0.0999052 0 0.000299604 0.00299888 0.58247 0.00209158
|
||||||
|
4.64844 0.00284903 0.108586 0 0.000309364 0.00284903 0.649516 0.00213677
|
||||||
|
4.72656 0.00270302 0.117266 0 0.000316973 0.00270302 0.720135 0.00217166
|
||||||
|
4.80469 0.00256085 0.125947 0 0.000322531 0.00256085 0.794623 0.00219697
|
||||||
|
4.88281 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.96094 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.03906 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.11719 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.19531 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.27344 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.35156 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.42969 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.50781 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.58594 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.66406 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.74219 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.82031 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.89844 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.97656 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.05469 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.13281 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.21094 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.28906 0.001 0 0 0 0.001 0 0
|
||||||
|
6.36719 0.001 0 0 0 0.001 0 0
|
||||||
|
6.44531 0.001 0 0 0 0.001 0 0
|
||||||
|
6.52344 0.001 0 0 0 0.001 0 0
|
||||||
|
6.60156 0.001 0 0 0 0.001 0 0
|
||||||
|
6.67969 0.001 0 0 0 0.001 0 0
|
||||||
|
6.75781 0.001 0 0 0 0.001 0 0
|
||||||
|
6.83594 0.001 0 0 0 0.001 0 0
|
||||||
|
6.91406 0.001 0 0 0 0.001 0 0
|
||||||
|
6.99219 0.001 0 0 0 0.001 0 0
|
||||||
|
7.07031 0.001 0 0 0 0.001 0 0
|
||||||
|
7.14844 0.001 0 0 0 0.001 0 0
|
||||||
|
7.22656 0.001 0 0 0 0.001 0 0
|
||||||
|
7.30469 0.001 0 0 0 0.001 0 0
|
||||||
|
7.38281 0.001 0 0 0 0.001 0 0
|
||||||
|
7.46094 0.001 0 0 0 0.001 0 0
|
||||||
|
7.53906 0.001 0 0 0 0.001 0 0
|
||||||
|
7.61719 0.001 0 0 0 0.001 0 0
|
||||||
|
7.69531 0.001 0 0 0 0.001 0 0
|
||||||
|
7.77344 0.001 0 0 0 0.001 0 0
|
||||||
|
7.85156 0.001 0 0 0 0.001 0 0
|
||||||
|
7.92969 0.001 0 0 0 0.001 0 0
|
||||||
|
8.00781 0.001 0 0 0 0.001 0 0
|
||||||
|
8.08594 0.001 0 0 0 0.001 0 0
|
||||||
|
8.16406 0.001 0 0 0 0.001 0 0
|
||||||
|
8.24219 0.001 0 0 0 0.001 0 0
|
||||||
|
8.32031 0.001 0 0 0 0.001 0 0
|
||||||
|
8.39844 0.001 0 0 0 0.001 0 0
|
||||||
|
8.47656 0.001 0 0 0 0.001 0 0
|
||||||
|
8.55469 0.001 0 0 0 0.001 0 0
|
||||||
|
8.63281 0.001 0 0 0 0.001 0 0
|
||||||
|
8.71094 0.001 0 0 0 0.001 0 0
|
||||||
|
8.78906 0.001 0 0 0 0.001 0 0
|
||||||
|
8.86719 0.001 0 0 0 0.001 0 0
|
||||||
|
8.94531 0.001 0 0 0 0.001 0 0
|
||||||
|
9.02344 0.001 0 0 0 0.001 0 0
|
||||||
|
9.10156 0.001 0 0 0 0.001 0 0
|
||||||
|
9.17969 0.001 0 0 0 0.001 0 0
|
||||||
|
9.25781 0.001 0 0 0 0.001 0 0
|
||||||
|
9.33594 0.001 0 0 0 0.001 0 0
|
||||||
|
9.41406 0.001 0 0 0 0.001 0 0
|
||||||
|
9.49219 0.001 0 0 0 0.001 0 0
|
||||||
|
9.57031 0.001 0 0 0 0.001 0 0
|
||||||
|
9.64844 0.001 0 0 0 0.001 0 0
|
||||||
|
9.72656 0.001 0 0 0 0.001 0 0
|
||||||
|
9.80469 0.001 0 0 0 0.001 0 0
|
||||||
|
9.88281 0.001 0 0 0 0.001 0 0
|
||||||
|
9.96094 0.001 0 0 0 0.001 0 0
|
Can't render this file because it has a wrong number of fields in line 18.
|
2066
shock1d_ref_nx=2048.csv
Normal file
2066
shock1d_ref_nx=2048.csv
Normal file
File diff suppressed because it is too large
Load Diff
274
shock1d_ref_nx=256.csv
Normal file
274
shock1d_ref_nx=256.csv
Normal file
@ -0,0 +1,274 @@
|
|||||||
|
##############################################################################
|
||||||
|
# Generated by SWASHES version 1.03.00, 2016-01-29
|
||||||
|
##############################################################################
|
||||||
|
# Dimension: 1
|
||||||
|
# Type: 3 (=Dam break)
|
||||||
|
# Domain: 1
|
||||||
|
# Choice: 1 (=on a wet domain without friction (Stoker's solution))
|
||||||
|
##############################################################################
|
||||||
|
# PARAMETERS OF THE SOLUTION
|
||||||
|
#
|
||||||
|
# Length of the domain: 10 meters
|
||||||
|
# Space step: 0.0390625 meters
|
||||||
|
# Number of cells: 256
|
||||||
|
# Position of the dam: x=5 meters
|
||||||
|
# Time value: 6 seconds
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
#(i-0.5)*dx h[i] u[i] topo[i] q[i] topo[i]+h[i] Fr[i]=Froude topo[i]+hc[i]
|
||||||
|
0.0195312 0.005 0 0 0 0.005 0 0
|
||||||
|
0.0585938 0.005 0 0 0 0.005 0 0
|
||||||
|
0.0976562 0.005 0 0 0 0.005 0 0
|
||||||
|
0.136719 0.005 0 0 0 0.005 0 0
|
||||||
|
0.175781 0.005 0 0 0 0.005 0 0
|
||||||
|
0.214844 0.005 0 0 0 0.005 0 0
|
||||||
|
0.253906 0.005 0 0 0 0.005 0 0
|
||||||
|
0.292969 0.005 0 0 0 0.005 0 0
|
||||||
|
0.332031 0.005 0 0 0 0.005 0 0
|
||||||
|
0.371094 0.005 0 0 0 0.005 0 0
|
||||||
|
0.410156 0.005 0 0 0 0.005 0 0
|
||||||
|
0.449219 0.005 0 0 0 0.005 0 0
|
||||||
|
0.488281 0.005 0 0 0 0.005 0 0
|
||||||
|
0.527344 0.005 0 0 0 0.005 0 0
|
||||||
|
0.566406 0.005 0 0 0 0.005 0 0
|
||||||
|
0.605469 0.005 0 0 0 0.005 0 0
|
||||||
|
0.644531 0.005 0 0 0 0.005 0 0
|
||||||
|
0.683594 0.005 0 0 0 0.005 0 0
|
||||||
|
0.722656 0.005 0 0 0 0.005 0 0
|
||||||
|
0.761719 0.005 0 0 0 0.005 0 0
|
||||||
|
0.800781 0.005 0 0 0 0.005 0 0
|
||||||
|
0.839844 0.005 0 0 0 0.005 0 0
|
||||||
|
0.878906 0.005 0 0 0 0.005 0 0
|
||||||
|
0.917969 0.005 0 0 0 0.005 0 0
|
||||||
|
0.957031 0.005 0 0 0 0.005 0 0
|
||||||
|
0.996094 0.005 0 0 0 0.005 0 0
|
||||||
|
1.03516 0.005 0 0 0 0.005 0 0
|
||||||
|
1.07422 0.005 0 0 0 0.005 0 0
|
||||||
|
1.11328 0.005 0 0 0 0.005 0 0
|
||||||
|
1.15234 0.005 0 0 0 0.005 0 0
|
||||||
|
1.19141 0.005 0 0 0 0.005 0 0
|
||||||
|
1.23047 0.005 0 0 0 0.005 0 0
|
||||||
|
1.26953 0.005 0 0 0 0.005 0 0
|
||||||
|
1.30859 0.005 0 0 0 0.005 0 0
|
||||||
|
1.34766 0.005 0 0 0 0.005 0 0
|
||||||
|
1.38672 0.005 0 0 0 0.005 0 0
|
||||||
|
1.42578 0.005 0 0 0 0.005 0 0
|
||||||
|
1.46484 0.005 0 0 0 0.005 0 0
|
||||||
|
1.50391 0.005 0 0 0 0.005 0 0
|
||||||
|
1.54297 0.005 0 0 0 0.005 0 0
|
||||||
|
1.58203 0.005 0 0 0 0.005 0 0
|
||||||
|
1.62109 0.005 0 0 0 0.005 0 0
|
||||||
|
1.66016 0.005 0 0 0 0.005 0 0
|
||||||
|
1.69922 0.005 0 0 0 0.005 0 0
|
||||||
|
1.73828 0.005 0 0 0 0.005 0 0
|
||||||
|
1.77734 0.005 0 0 0 0.005 0 0
|
||||||
|
1.81641 0.005 0 0 0 0.005 0 0
|
||||||
|
1.85547 0.005 0 0 0 0.005 0 0
|
||||||
|
1.89453 0.005 0 0 0 0.005 0 0
|
||||||
|
1.93359 0.005 0 0 0 0.005 0 0
|
||||||
|
1.97266 0.005 0 0 0 0.005 0 0
|
||||||
|
2.01172 0.005 0 0 0 0.005 0 0
|
||||||
|
2.05078 0.005 0 0 0 0.005 0 0
|
||||||
|
2.08984 0.005 0 0 0 0.005 0 0
|
||||||
|
2.12891 0.005 0 0 0 0.005 0 0
|
||||||
|
2.16797 0.005 0 0 0 0.005 0 0
|
||||||
|
2.20703 0.005 0 0 0 0.005 0 0
|
||||||
|
2.24609 0.005 0 0 0 0.005 0 0
|
||||||
|
2.28516 0.005 0 0 0 0.005 0 0
|
||||||
|
2.32422 0.005 0 0 0 0.005 0 0
|
||||||
|
2.36328 0.005 0 0 0 0.005 0 0
|
||||||
|
2.40234 0.005 0 0 0 0.005 0 0
|
||||||
|
2.44141 0.005 0 0 0 0.005 0 0
|
||||||
|
2.48047 0.005 0 0 0 0.005 0 0
|
||||||
|
2.51953 0.005 0 0 0 0.005 0 0
|
||||||
|
2.55859 0.005 0 0 0 0.005 0 0
|
||||||
|
2.59766 0.005 0 0 0 0.005 0 0
|
||||||
|
2.63672 0.005 0 0 0 0.005 0 0
|
||||||
|
2.67578 0.005 0 0 0 0.005 0 0
|
||||||
|
2.71484 0.005 0 0 0 0.005 0 0
|
||||||
|
2.75391 0.005 0 0 0 0.005 0 0
|
||||||
|
2.79297 0.005 0 0 0 0.005 0 0
|
||||||
|
2.83203 0.005 0 0 0 0.005 0 0
|
||||||
|
2.87109 0.005 0 0 0 0.005 0 0
|
||||||
|
2.91016 0.005 0 0 0 0.005 0 0
|
||||||
|
2.94922 0.005 0 0 0 0.005 0 0
|
||||||
|
2.98828 0.005 0 0 0 0.005 0 0
|
||||||
|
3.02734 0.005 0 0 0 0.005 0 0
|
||||||
|
3.06641 0.005 0 0 0 0.005 0 0
|
||||||
|
3.10547 0.005 0 0 0 0.005 0 0
|
||||||
|
3.14453 0.005 0 0 0 0.005 0 0
|
||||||
|
3.18359 0.005 0 0 0 0.005 0 0
|
||||||
|
3.22266 0.005 0 0 0 0.005 0 0
|
||||||
|
3.26172 0.005 0 0 0 0.005 0 0
|
||||||
|
3.30078 0.005 0 0 0 0.005 0 0
|
||||||
|
3.33984 0.005 0 0 0 0.005 0 0
|
||||||
|
3.37891 0.005 0 0 0 0.005 0 0
|
||||||
|
3.41797 0.005 0 0 0 0.005 0 0
|
||||||
|
3.45703 0.005 0 0 0 0.005 0 0
|
||||||
|
3.49609 0.005 0 0 0 0.005 0 0
|
||||||
|
3.53516 0.005 0 0 0 0.005 0 0
|
||||||
|
3.57422 0.005 0 0 0 0.005 0 0
|
||||||
|
3.61328 0.005 0 0 0 0.005 0 0
|
||||||
|
3.65234 0.005 0 0 0 0.005 0 0
|
||||||
|
3.69141 0.00494936 0.00224893 0 1.11307e-005 0.00494936 0.0102062 0.000232877
|
||||||
|
3.73047 0.00485235 0.0065892 0 3.19731e-005 0.00485235 0.0302011 0.00047058
|
||||||
|
3.76953 0.0047563 0.0109295 0 5.19839e-005 0.0047563 0.0505977 0.000650663
|
||||||
|
3.80859 0.00466121 0.0152698 0 7.11755e-005 0.00466121 0.0714082 0.000802289
|
||||||
|
3.84766 0.00456708 0.01961 0 8.95606e-005 0.00456708 0.0926456 0.000935093
|
||||||
|
3.88672 0.00447391 0.0239503 0 0.000107152 0.00447391 0.114323 0.00105384
|
||||||
|
3.92578 0.0043817 0.0282906 0 0.000123961 0.0043817 0.136454 0.00116136
|
||||||
|
3.96484 0.00429045 0.0326309 0 0.000140001 0.00429045 0.159053 0.0012595
|
||||||
|
4.00391 0.00420017 0.0369711 0 0.000155285 0.00420017 0.182136 0.00134957
|
||||||
|
4.04297 0.00411084 0.0413114 0 0.000169825 0.00411084 0.205717 0.00143255
|
||||||
|
4.08203 0.00402247 0.0456517 0 0.000183633 0.00402247 0.229814 0.00150919
|
||||||
|
4.12109 0.00393506 0.049992 0 0.000196722 0.00393506 0.254443 0.00158008
|
||||||
|
4.16016 0.00384861 0.0543323 0 0.000209104 0.00384861 0.279622 0.0016457
|
||||||
|
4.19922 0.00376313 0.0586725 0 0.000220792 0.00376313 0.30537 0.00170647
|
||||||
|
4.23828 0.0036786 0.0630128 0 0.000231799 0.0036786 0.331706 0.00176272
|
||||||
|
4.27734 0.00359503 0.0673531 0 0.000242137 0.00359503 0.358651 0.00181475
|
||||||
|
4.31641 0.00351242 0.0716934 0 0.000251818 0.00351242 0.386226 0.00186281
|
||||||
|
4.35547 0.00343078 0.0760336 0 0.000260855 0.00343078 0.414453 0.00190711
|
||||||
|
4.39453 0.00335009 0.0803739 0 0.00026926 0.00335009 0.443356 0.00194786
|
||||||
|
4.43359 0.00327036 0.0847142 0 0.000277046 0.00327036 0.472959 0.00198523
|
||||||
|
4.47266 0.0031916 0.0890545 0 0.000284226 0.0031916 0.503289 0.00201939
|
||||||
|
4.51172 0.00311379 0.0933948 0 0.000290812 0.00311379 0.534371 0.00205046
|
||||||
|
4.55078 0.00303694 0.097735 0 0.000296816 0.00303694 0.566236 0.00207859
|
||||||
|
4.58984 0.00296106 0.102075 0 0.000302251 0.00296106 0.598912 0.00210389
|
||||||
|
4.62891 0.00288613 0.106416 0 0.000307129 0.00288613 0.63243 0.00212646
|
||||||
|
4.66797 0.00281217 0.110756 0 0.000311464 0.00281217 0.666825 0.00214642
|
||||||
|
4.70703 0.00273916 0.115096 0 0.000315267 0.00273916 0.70213 0.00216386
|
||||||
|
4.74609 0.00266712 0.119436 0 0.000318551 0.00266712 0.738383 0.00217886
|
||||||
|
4.78516 0.00259603 0.123777 0 0.000321328 0.00259603 0.775621 0.00219151
|
||||||
|
4.82422 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.86328 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.90234 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.94141 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.98047 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.01953 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.05859 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.09766 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.13672 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.17578 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.21484 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.25391 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.29297 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.33203 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.37109 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.41016 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.44922 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.48828 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.52734 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.56641 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.60547 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.64453 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.68359 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.72266 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.76172 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.80078 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.83984 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.87891 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.91797 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.95703 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.99609 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.03516 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.07422 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.11328 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.15234 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.19141 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.23047 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.26953 0.001 0 0 0 0.001 0 0
|
||||||
|
6.30859 0.001 0 0 0 0.001 0 0
|
||||||
|
6.34766 0.001 0 0 0 0.001 0 0
|
||||||
|
6.38672 0.001 0 0 0 0.001 0 0
|
||||||
|
6.42578 0.001 0 0 0 0.001 0 0
|
||||||
|
6.46484 0.001 0 0 0 0.001 0 0
|
||||||
|
6.50391 0.001 0 0 0 0.001 0 0
|
||||||
|
6.54297 0.001 0 0 0 0.001 0 0
|
||||||
|
6.58203 0.001 0 0 0 0.001 0 0
|
||||||
|
6.62109 0.001 0 0 0 0.001 0 0
|
||||||
|
6.66016 0.001 0 0 0 0.001 0 0
|
||||||
|
6.69922 0.001 0 0 0 0.001 0 0
|
||||||
|
6.73828 0.001 0 0 0 0.001 0 0
|
||||||
|
6.77734 0.001 0 0 0 0.001 0 0
|
||||||
|
6.81641 0.001 0 0 0 0.001 0 0
|
||||||
|
6.85547 0.001 0 0 0 0.001 0 0
|
||||||
|
6.89453 0.001 0 0 0 0.001 0 0
|
||||||
|
6.93359 0.001 0 0 0 0.001 0 0
|
||||||
|
6.97266 0.001 0 0 0 0.001 0 0
|
||||||
|
7.01172 0.001 0 0 0 0.001 0 0
|
||||||
|
7.05078 0.001 0 0 0 0.001 0 0
|
||||||
|
7.08984 0.001 0 0 0 0.001 0 0
|
||||||
|
7.12891 0.001 0 0 0 0.001 0 0
|
||||||
|
7.16797 0.001 0 0 0 0.001 0 0
|
||||||
|
7.20703 0.001 0 0 0 0.001 0 0
|
||||||
|
7.24609 0.001 0 0 0 0.001 0 0
|
||||||
|
7.28516 0.001 0 0 0 0.001 0 0
|
||||||
|
7.32422 0.001 0 0 0 0.001 0 0
|
||||||
|
7.36328 0.001 0 0 0 0.001 0 0
|
||||||
|
7.40234 0.001 0 0 0 0.001 0 0
|
||||||
|
7.44141 0.001 0 0 0 0.001 0 0
|
||||||
|
7.48047 0.001 0 0 0 0.001 0 0
|
||||||
|
7.51953 0.001 0 0 0 0.001 0 0
|
||||||
|
7.55859 0.001 0 0 0 0.001 0 0
|
||||||
|
7.59766 0.001 0 0 0 0.001 0 0
|
||||||
|
7.63672 0.001 0 0 0 0.001 0 0
|
||||||
|
7.67578 0.001 0 0 0 0.001 0 0
|
||||||
|
7.71484 0.001 0 0 0 0.001 0 0
|
||||||
|
7.75391 0.001 0 0 0 0.001 0 0
|
||||||
|
7.79297 0.001 0 0 0 0.001 0 0
|
||||||
|
7.83203 0.001 0 0 0 0.001 0 0
|
||||||
|
7.87109 0.001 0 0 0 0.001 0 0
|
||||||
|
7.91016 0.001 0 0 0 0.001 0 0
|
||||||
|
7.94922 0.001 0 0 0 0.001 0 0
|
||||||
|
7.98828 0.001 0 0 0 0.001 0 0
|
||||||
|
8.02734 0.001 0 0 0 0.001 0 0
|
||||||
|
8.06641 0.001 0 0 0 0.001 0 0
|
||||||
|
8.10547 0.001 0 0 0 0.001 0 0
|
||||||
|
8.14453 0.001 0 0 0 0.001 0 0
|
||||||
|
8.18359 0.001 0 0 0 0.001 0 0
|
||||||
|
8.22266 0.001 0 0 0 0.001 0 0
|
||||||
|
8.26172 0.001 0 0 0 0.001 0 0
|
||||||
|
8.30078 0.001 0 0 0 0.001 0 0
|
||||||
|
8.33984 0.001 0 0 0 0.001 0 0
|
||||||
|
8.37891 0.001 0 0 0 0.001 0 0
|
||||||
|
8.41797 0.001 0 0 0 0.001 0 0
|
||||||
|
8.45703 0.001 0 0 0 0.001 0 0
|
||||||
|
8.49609 0.001 0 0 0 0.001 0 0
|
||||||
|
8.53516 0.001 0 0 0 0.001 0 0
|
||||||
|
8.57422 0.001 0 0 0 0.001 0 0
|
||||||
|
8.61328 0.001 0 0 0 0.001 0 0
|
||||||
|
8.65234 0.001 0 0 0 0.001 0 0
|
||||||
|
8.69141 0.001 0 0 0 0.001 0 0
|
||||||
|
8.73047 0.001 0 0 0 0.001 0 0
|
||||||
|
8.76953 0.001 0 0 0 0.001 0 0
|
||||||
|
8.80859 0.001 0 0 0 0.001 0 0
|
||||||
|
8.84766 0.001 0 0 0 0.001 0 0
|
||||||
|
8.88672 0.001 0 0 0 0.001 0 0
|
||||||
|
8.92578 0.001 0 0 0 0.001 0 0
|
||||||
|
8.96484 0.001 0 0 0 0.001 0 0
|
||||||
|
9.00391 0.001 0 0 0 0.001 0 0
|
||||||
|
9.04297 0.001 0 0 0 0.001 0 0
|
||||||
|
9.08203 0.001 0 0 0 0.001 0 0
|
||||||
|
9.12109 0.001 0 0 0 0.001 0 0
|
||||||
|
9.16016 0.001 0 0 0 0.001 0 0
|
||||||
|
9.19922 0.001 0 0 0 0.001 0 0
|
||||||
|
9.23828 0.001 0 0 0 0.001 0 0
|
||||||
|
9.27734 0.001 0 0 0 0.001 0 0
|
||||||
|
9.31641 0.001 0 0 0 0.001 0 0
|
||||||
|
9.35547 0.001 0 0 0 0.001 0 0
|
||||||
|
9.39453 0.001 0 0 0 0.001 0 0
|
||||||
|
9.43359 0.001 0 0 0 0.001 0 0
|
||||||
|
9.47266 0.001 0 0 0 0.001 0 0
|
||||||
|
9.51172 0.001 0 0 0 0.001 0 0
|
||||||
|
9.55078 0.001 0 0 0 0.001 0 0
|
||||||
|
9.58984 0.001 0 0 0 0.001 0 0
|
||||||
|
9.62891 0.001 0 0 0 0.001 0 0
|
||||||
|
9.66797 0.001 0 0 0 0.001 0 0
|
||||||
|
9.70703 0.001 0 0 0 0.001 0 0
|
||||||
|
9.74609 0.001 0 0 0 0.001 0 0
|
||||||
|
9.78516 0.001 0 0 0 0.001 0 0
|
||||||
|
9.82422 0.001 0 0 0 0.001 0 0
|
||||||
|
9.86328 0.001 0 0 0 0.001 0 0
|
||||||
|
9.90234 0.001 0 0 0 0.001 0 0
|
||||||
|
9.94141 0.001 0 0 0 0.001 0 0
|
||||||
|
9.98047 0.001 0 0 0 0.001 0 0
|
Can't render this file because it has a wrong number of fields in line 18.
|
4114
shock1d_ref_nx=4096.csv
Normal file
4114
shock1d_ref_nx=4096.csv
Normal file
File diff suppressed because it is too large
Load Diff
530
shock1d_ref_nx=512.csv
Normal file
530
shock1d_ref_nx=512.csv
Normal file
@ -0,0 +1,530 @@
|
|||||||
|
##############################################################################
|
||||||
|
# Generated by SWASHES version 1.03.00, 2016-01-29
|
||||||
|
##############################################################################
|
||||||
|
# Dimension: 1
|
||||||
|
# Type: 3 (=Dam break)
|
||||||
|
# Domain: 1
|
||||||
|
# Choice: 1 (=on a wet domain without friction (Stoker's solution))
|
||||||
|
##############################################################################
|
||||||
|
# PARAMETERS OF THE SOLUTION
|
||||||
|
#
|
||||||
|
# Length of the domain: 10 meters
|
||||||
|
# Space step: 0.0195312 meters
|
||||||
|
# Number of cells: 512
|
||||||
|
# Position of the dam: x=5 meters
|
||||||
|
# Time value: 6 seconds
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
#(i-0.5)*dx h[i] u[i] topo[i] q[i] topo[i]+h[i] Fr[i]=Froude topo[i]+hc[i]
|
||||||
|
0.00976562 0.005 0 0 0 0.005 0 0
|
||||||
|
0.0292969 0.005 0 0 0 0.005 0 0
|
||||||
|
0.0488281 0.005 0 0 0 0.005 0 0
|
||||||
|
0.0683594 0.005 0 0 0 0.005 0 0
|
||||||
|
0.0878906 0.005 0 0 0 0.005 0 0
|
||||||
|
0.107422 0.005 0 0 0 0.005 0 0
|
||||||
|
0.126953 0.005 0 0 0 0.005 0 0
|
||||||
|
0.146484 0.005 0 0 0 0.005 0 0
|
||||||
|
0.166016 0.005 0 0 0 0.005 0 0
|
||||||
|
0.185547 0.005 0 0 0 0.005 0 0
|
||||||
|
0.205078 0.005 0 0 0 0.005 0 0
|
||||||
|
0.224609 0.005 0 0 0 0.005 0 0
|
||||||
|
0.244141 0.005 0 0 0 0.005 0 0
|
||||||
|
0.263672 0.005 0 0 0 0.005 0 0
|
||||||
|
0.283203 0.005 0 0 0 0.005 0 0
|
||||||
|
0.302734 0.005 0 0 0 0.005 0 0
|
||||||
|
0.322266 0.005 0 0 0 0.005 0 0
|
||||||
|
0.341797 0.005 0 0 0 0.005 0 0
|
||||||
|
0.361328 0.005 0 0 0 0.005 0 0
|
||||||
|
0.380859 0.005 0 0 0 0.005 0 0
|
||||||
|
0.400391 0.005 0 0 0 0.005 0 0
|
||||||
|
0.419922 0.005 0 0 0 0.005 0 0
|
||||||
|
0.439453 0.005 0 0 0 0.005 0 0
|
||||||
|
0.458984 0.005 0 0 0 0.005 0 0
|
||||||
|
0.478516 0.005 0 0 0 0.005 0 0
|
||||||
|
0.498047 0.005 0 0 0 0.005 0 0
|
||||||
|
0.517578 0.005 0 0 0 0.005 0 0
|
||||||
|
0.537109 0.005 0 0 0 0.005 0 0
|
||||||
|
0.556641 0.005 0 0 0 0.005 0 0
|
||||||
|
0.576172 0.005 0 0 0 0.005 0 0
|
||||||
|
0.595703 0.005 0 0 0 0.005 0 0
|
||||||
|
0.615234 0.005 0 0 0 0.005 0 0
|
||||||
|
0.634766 0.005 0 0 0 0.005 0 0
|
||||||
|
0.654297 0.005 0 0 0 0.005 0 0
|
||||||
|
0.673828 0.005 0 0 0 0.005 0 0
|
||||||
|
0.693359 0.005 0 0 0 0.005 0 0
|
||||||
|
0.712891 0.005 0 0 0 0.005 0 0
|
||||||
|
0.732422 0.005 0 0 0 0.005 0 0
|
||||||
|
0.751953 0.005 0 0 0 0.005 0 0
|
||||||
|
0.771484 0.005 0 0 0 0.005 0 0
|
||||||
|
0.791016 0.005 0 0 0 0.005 0 0
|
||||||
|
0.810547 0.005 0 0 0 0.005 0 0
|
||||||
|
0.830078 0.005 0 0 0 0.005 0 0
|
||||||
|
0.849609 0.005 0 0 0 0.005 0 0
|
||||||
|
0.869141 0.005 0 0 0 0.005 0 0
|
||||||
|
0.888672 0.005 0 0 0 0.005 0 0
|
||||||
|
0.908203 0.005 0 0 0 0.005 0 0
|
||||||
|
0.927734 0.005 0 0 0 0.005 0 0
|
||||||
|
0.947266 0.005 0 0 0 0.005 0 0
|
||||||
|
0.966797 0.005 0 0 0 0.005 0 0
|
||||||
|
0.986328 0.005 0 0 0 0.005 0 0
|
||||||
|
1.00586 0.005 0 0 0 0.005 0 0
|
||||||
|
1.02539 0.005 0 0 0 0.005 0 0
|
||||||
|
1.04492 0.005 0 0 0 0.005 0 0
|
||||||
|
1.06445 0.005 0 0 0 0.005 0 0
|
||||||
|
1.08398 0.005 0 0 0 0.005 0 0
|
||||||
|
1.10352 0.005 0 0 0 0.005 0 0
|
||||||
|
1.12305 0.005 0 0 0 0.005 0 0
|
||||||
|
1.14258 0.005 0 0 0 0.005 0 0
|
||||||
|
1.16211 0.005 0 0 0 0.005 0 0
|
||||||
|
1.18164 0.005 0 0 0 0.005 0 0
|
||||||
|
1.20117 0.005 0 0 0 0.005 0 0
|
||||||
|
1.2207 0.005 0 0 0 0.005 0 0
|
||||||
|
1.24023 0.005 0 0 0 0.005 0 0
|
||||||
|
1.25977 0.005 0 0 0 0.005 0 0
|
||||||
|
1.2793 0.005 0 0 0 0.005 0 0
|
||||||
|
1.29883 0.005 0 0 0 0.005 0 0
|
||||||
|
1.31836 0.005 0 0 0 0.005 0 0
|
||||||
|
1.33789 0.005 0 0 0 0.005 0 0
|
||||||
|
1.35742 0.005 0 0 0 0.005 0 0
|
||||||
|
1.37695 0.005 0 0 0 0.005 0 0
|
||||||
|
1.39648 0.005 0 0 0 0.005 0 0
|
||||||
|
1.41602 0.005 0 0 0 0.005 0 0
|
||||||
|
1.43555 0.005 0 0 0 0.005 0 0
|
||||||
|
1.45508 0.005 0 0 0 0.005 0 0
|
||||||
|
1.47461 0.005 0 0 0 0.005 0 0
|
||||||
|
1.49414 0.005 0 0 0 0.005 0 0
|
||||||
|
1.51367 0.005 0 0 0 0.005 0 0
|
||||||
|
1.5332 0.005 0 0 0 0.005 0 0
|
||||||
|
1.55273 0.005 0 0 0 0.005 0 0
|
||||||
|
1.57227 0.005 0 0 0 0.005 0 0
|
||||||
|
1.5918 0.005 0 0 0 0.005 0 0
|
||||||
|
1.61133 0.005 0 0 0 0.005 0 0
|
||||||
|
1.63086 0.005 0 0 0 0.005 0 0
|
||||||
|
1.65039 0.005 0 0 0 0.005 0 0
|
||||||
|
1.66992 0.005 0 0 0 0.005 0 0
|
||||||
|
1.68945 0.005 0 0 0 0.005 0 0
|
||||||
|
1.70898 0.005 0 0 0 0.005 0 0
|
||||||
|
1.72852 0.005 0 0 0 0.005 0 0
|
||||||
|
1.74805 0.005 0 0 0 0.005 0 0
|
||||||
|
1.76758 0.005 0 0 0 0.005 0 0
|
||||||
|
1.78711 0.005 0 0 0 0.005 0 0
|
||||||
|
1.80664 0.005 0 0 0 0.005 0 0
|
||||||
|
1.82617 0.005 0 0 0 0.005 0 0
|
||||||
|
1.8457 0.005 0 0 0 0.005 0 0
|
||||||
|
1.86523 0.005 0 0 0 0.005 0 0
|
||||||
|
1.88477 0.005 0 0 0 0.005 0 0
|
||||||
|
1.9043 0.005 0 0 0 0.005 0 0
|
||||||
|
1.92383 0.005 0 0 0 0.005 0 0
|
||||||
|
1.94336 0.005 0 0 0 0.005 0 0
|
||||||
|
1.96289 0.005 0 0 0 0.005 0 0
|
||||||
|
1.98242 0.005 0 0 0 0.005 0 0
|
||||||
|
2.00195 0.005 0 0 0 0.005 0 0
|
||||||
|
2.02148 0.005 0 0 0 0.005 0 0
|
||||||
|
2.04102 0.005 0 0 0 0.005 0 0
|
||||||
|
2.06055 0.005 0 0 0 0.005 0 0
|
||||||
|
2.08008 0.005 0 0 0 0.005 0 0
|
||||||
|
2.09961 0.005 0 0 0 0.005 0 0
|
||||||
|
2.11914 0.005 0 0 0 0.005 0 0
|
||||||
|
2.13867 0.005 0 0 0 0.005 0 0
|
||||||
|
2.1582 0.005 0 0 0 0.005 0 0
|
||||||
|
2.17773 0.005 0 0 0 0.005 0 0
|
||||||
|
2.19727 0.005 0 0 0 0.005 0 0
|
||||||
|
2.2168 0.005 0 0 0 0.005 0 0
|
||||||
|
2.23633 0.005 0 0 0 0.005 0 0
|
||||||
|
2.25586 0.005 0 0 0 0.005 0 0
|
||||||
|
2.27539 0.005 0 0 0 0.005 0 0
|
||||||
|
2.29492 0.005 0 0 0 0.005 0 0
|
||||||
|
2.31445 0.005 0 0 0 0.005 0 0
|
||||||
|
2.33398 0.005 0 0 0 0.005 0 0
|
||||||
|
2.35352 0.005 0 0 0 0.005 0 0
|
||||||
|
2.37305 0.005 0 0 0 0.005 0 0
|
||||||
|
2.39258 0.005 0 0 0 0.005 0 0
|
||||||
|
2.41211 0.005 0 0 0 0.005 0 0
|
||||||
|
2.43164 0.005 0 0 0 0.005 0 0
|
||||||
|
2.45117 0.005 0 0 0 0.005 0 0
|
||||||
|
2.4707 0.005 0 0 0 0.005 0 0
|
||||||
|
2.49023 0.005 0 0 0 0.005 0 0
|
||||||
|
2.50977 0.005 0 0 0 0.005 0 0
|
||||||
|
2.5293 0.005 0 0 0 0.005 0 0
|
||||||
|
2.54883 0.005 0 0 0 0.005 0 0
|
||||||
|
2.56836 0.005 0 0 0 0.005 0 0
|
||||||
|
2.58789 0.005 0 0 0 0.005 0 0
|
||||||
|
2.60742 0.005 0 0 0 0.005 0 0
|
||||||
|
2.62695 0.005 0 0 0 0.005 0 0
|
||||||
|
2.64648 0.005 0 0 0 0.005 0 0
|
||||||
|
2.66602 0.005 0 0 0 0.005 0 0
|
||||||
|
2.68555 0.005 0 0 0 0.005 0 0
|
||||||
|
2.70508 0.005 0 0 0 0.005 0 0
|
||||||
|
2.72461 0.005 0 0 0 0.005 0 0
|
||||||
|
2.74414 0.005 0 0 0 0.005 0 0
|
||||||
|
2.76367 0.005 0 0 0 0.005 0 0
|
||||||
|
2.7832 0.005 0 0 0 0.005 0 0
|
||||||
|
2.80273 0.005 0 0 0 0.005 0 0
|
||||||
|
2.82227 0.005 0 0 0 0.005 0 0
|
||||||
|
2.8418 0.005 0 0 0 0.005 0 0
|
||||||
|
2.86133 0.005 0 0 0 0.005 0 0
|
||||||
|
2.88086 0.005 0 0 0 0.005 0 0
|
||||||
|
2.90039 0.005 0 0 0 0.005 0 0
|
||||||
|
2.91992 0.005 0 0 0 0.005 0 0
|
||||||
|
2.93945 0.005 0 0 0 0.005 0 0
|
||||||
|
2.95898 0.005 0 0 0 0.005 0 0
|
||||||
|
2.97852 0.005 0 0 0 0.005 0 0
|
||||||
|
2.99805 0.005 0 0 0 0.005 0 0
|
||||||
|
3.01758 0.005 0 0 0 0.005 0 0
|
||||||
|
3.03711 0.005 0 0 0 0.005 0 0
|
||||||
|
3.05664 0.005 0 0 0 0.005 0 0
|
||||||
|
3.07617 0.005 0 0 0 0.005 0 0
|
||||||
|
3.0957 0.005 0 0 0 0.005 0 0
|
||||||
|
3.11523 0.005 0 0 0 0.005 0 0
|
||||||
|
3.13477 0.005 0 0 0 0.005 0 0
|
||||||
|
3.1543 0.005 0 0 0 0.005 0 0
|
||||||
|
3.17383 0.005 0 0 0 0.005 0 0
|
||||||
|
3.19336 0.005 0 0 0 0.005 0 0
|
||||||
|
3.21289 0.005 0 0 0 0.005 0 0
|
||||||
|
3.23242 0.005 0 0 0 0.005 0 0
|
||||||
|
3.25195 0.005 0 0 0 0.005 0 0
|
||||||
|
3.27148 0.005 0 0 0 0.005 0 0
|
||||||
|
3.29102 0.005 0 0 0 0.005 0 0
|
||||||
|
3.31055 0.005 0 0 0 0.005 0 0
|
||||||
|
3.33008 0.005 0 0 0 0.005 0 0
|
||||||
|
3.34961 0.005 0 0 0 0.005 0 0
|
||||||
|
3.36914 0.005 0 0 0 0.005 0 0
|
||||||
|
3.38867 0.005 0 0 0 0.005 0 0
|
||||||
|
3.4082 0.005 0 0 0 0.005 0 0
|
||||||
|
3.42773 0.005 0 0 0 0.005 0 0
|
||||||
|
3.44727 0.005 0 0 0 0.005 0 0
|
||||||
|
3.4668 0.005 0 0 0 0.005 0 0
|
||||||
|
3.48633 0.005 0 0 0 0.005 0 0
|
||||||
|
3.50586 0.005 0 0 0 0.005 0 0
|
||||||
|
3.52539 0.005 0 0 0 0.005 0 0
|
||||||
|
3.54492 0.005 0 0 0 0.005 0 0
|
||||||
|
3.56445 0.005 0 0 0 0.005 0 0
|
||||||
|
3.58398 0.005 0 0 0 0.005 0 0
|
||||||
|
3.60352 0.005 0 0 0 0.005 0 0
|
||||||
|
3.62305 0.005 0 0 0 0.005 0 0
|
||||||
|
3.64258 0.005 0 0 0 0.005 0 0
|
||||||
|
3.66211 0.005 0 0 0 0.005 0 0
|
||||||
|
3.68164 0.00497376 0.00116386 0 5.78874e-006 0.00497376 0.00526893 0.000150603
|
||||||
|
3.70117 0.00492501 0.00333399 0 1.642e-005 0.00492501 0.0151679 0.000301781
|
||||||
|
3.7207 0.00487651 0.00550413 0 2.6841e-005 0.00487651 0.0251652 0.00041877
|
||||||
|
3.74023 0.00482825 0.00767427 0 3.70533e-005 0.00482825 0.0352621 0.000519192
|
||||||
|
3.75977 0.00478022 0.00984441 0 4.70585e-005 0.00478022 0.0454602 0.000608885
|
||||||
|
3.7793 0.00473244 0.0120146 0 5.68581e-005 0.00473244 0.055761 0.000690725
|
||||||
|
3.79883 0.00468489 0.0141847 0 6.64537e-005 0.00468489 0.0661661 0.000766402
|
||||||
|
3.81836 0.00463759 0.0163548 0 7.58469e-005 0.00463759 0.0766771 0.00083702
|
||||||
|
3.83789 0.00459052 0.018525 0 8.50393e-005 0.00459052 0.0872955 0.000903351
|
||||||
|
3.85742 0.0045437 0.0206951 0 9.40323e-005 0.0045437 0.0980231 0.000965966
|
||||||
|
3.87695 0.00449711 0.0228652 0 0.000102828 0.00449711 0.108862 0.0010253
|
||||||
|
3.89648 0.00445077 0.0250354 0 0.000111427 0.00445077 0.119813 0.00108169
|
||||||
|
3.91602 0.00440467 0.0272055 0 0.000119831 0.00440467 0.130878 0.00113542
|
||||||
|
3.93555 0.0043588 0.0293757 0 0.000128043 0.0043588 0.142059 0.00118672
|
||||||
|
3.95508 0.00431318 0.0315458 0 0.000136063 0.00431318 0.153359 0.00123577
|
||||||
|
3.97461 0.00426779 0.0337159 0 0.000143893 0.00426779 0.164778 0.00128273
|
||||||
|
3.99414 0.00422265 0.0358861 0 0.000151534 0.00422265 0.176319 0.00132775
|
||||||
|
4.01367 0.00417774 0.0380562 0 0.000158989 0.00417774 0.187984 0.00137095
|
||||||
|
4.0332 0.00413308 0.0402264 0 0.000166259 0.00413308 0.199774 0.00141243
|
||||||
|
4.05273 0.00408866 0.0423965 0 0.000173345 0.00408866 0.211692 0.00145228
|
||||||
|
4.07227 0.00404447 0.0445666 0 0.000180248 0.00404447 0.22374 0.00149059
|
||||||
|
4.0918 0.00400053 0.0467368 0 0.000186972 0.00400053 0.23592 0.00152743
|
||||||
|
4.11133 0.00395682 0.0489069 0 0.000193516 0.00395682 0.248235 0.00156287
|
||||||
|
4.13086 0.00391336 0.0510771 0 0.000199883 0.00391336 0.260685 0.00159696
|
||||||
|
4.15039 0.00387014 0.0532472 0 0.000206074 0.00387014 0.273274 0.00162977
|
||||||
|
4.16992 0.00382715 0.0554173 0 0.000212091 0.00382715 0.286005 0.00166134
|
||||||
|
4.18945 0.00378441 0.0575875 0 0.000217935 0.00378441 0.298878 0.00169172
|
||||||
|
4.20898 0.0037419 0.0597576 0 0.000223607 0.0037419 0.311898 0.00172095
|
||||||
|
4.22852 0.00369964 0.0619277 0 0.00022911 0.00369964 0.325066 0.00174907
|
||||||
|
4.24805 0.00365762 0.0640979 0 0.000234446 0.00365762 0.338384 0.00177612
|
||||||
|
4.26758 0.00361583 0.066268 0 0.000239614 0.00361583 0.351856 0.00180213
|
||||||
|
4.28711 0.00357429 0.0684382 0 0.000244618 0.00357429 0.365484 0.00182713
|
||||||
|
4.30664 0.00353299 0.0706083 0 0.000249458 0.00353299 0.379272 0.00185115
|
||||||
|
4.32617 0.00349192 0.0727784 0 0.000254137 0.00349192 0.39322 0.00187423
|
||||||
|
4.3457 0.0034511 0.0749486 0 0.000258655 0.0034511 0.407334 0.00189638
|
||||||
|
4.36523 0.00341052 0.0771187 0 0.000263015 0.00341052 0.421614 0.00191762
|
||||||
|
4.38477 0.00337017 0.0792889 0 0.000267217 0.00337017 0.436065 0.001938
|
||||||
|
4.4043 0.00333007 0.081459 0 0.000271264 0.00333007 0.45069 0.00195752
|
||||||
|
4.42383 0.00329021 0.0836291 0 0.000275157 0.00329021 0.465491 0.0019762
|
||||||
|
4.44336 0.00325058 0.0857993 0 0.000278898 0.00325058 0.480472 0.00199407
|
||||||
|
4.46289 0.0032112 0.0879694 0 0.000282487 0.0032112 0.495637 0.00201114
|
||||||
|
4.48242 0.00317206 0.0901396 0 0.000285928 0.00317206 0.510988 0.00202744
|
||||||
|
4.50195 0.00313315 0.0923097 0 0.00028922 0.00313315 0.526529 0.00204297
|
||||||
|
4.52148 0.00309449 0.0944798 0 0.000292367 0.00309449 0.542263 0.00205776
|
||||||
|
4.54102 0.00305607 0.09665 0 0.000295369 0.00305607 0.558195 0.00207183
|
||||||
|
4.56055 0.00301788 0.0988201 0 0.000298228 0.00301788 0.574327 0.00208517
|
||||||
|
4.58008 0.00297994 0.10099 0 0.000300945 0.00297994 0.590665 0.00209782
|
||||||
|
4.59961 0.00294224 0.10316 0 0.000303522 0.00294224 0.607211 0.00210978
|
||||||
|
4.61914 0.00290477 0.105331 0 0.000305961 0.00290477 0.62397 0.00212107
|
||||||
|
4.63867 0.00286755 0.107501 0 0.000308264 0.00286755 0.640945 0.0021317
|
||||||
|
4.6582 0.00283057 0.109671 0 0.000310431 0.00283057 0.658142 0.00214167
|
||||||
|
4.67773 0.00279383 0.111841 0 0.000312464 0.00279383 0.675564 0.00215102
|
||||||
|
4.69727 0.00275732 0.114011 0 0.000314365 0.00275732 0.693216 0.00215973
|
||||||
|
4.7168 0.00272106 0.116181 0 0.000316136 0.00272106 0.711103 0.00216784
|
||||||
|
4.73633 0.00268504 0.118351 0 0.000317778 0.00268504 0.729228 0.00217533
|
||||||
|
4.75586 0.00264925 0.120521 0 0.000319292 0.00264925 0.747598 0.00218224
|
||||||
|
4.77539 0.00261371 0.122692 0 0.00032068 0.00261371 0.766217 0.00218856
|
||||||
|
4.79492 0.00257841 0.124862 0 0.000321945 0.00257841 0.785089 0.00219431
|
||||||
|
4.81445 0.00254335 0.127032 0 0.000323086 0.00254335 0.804221 0.00219949
|
||||||
|
4.83398 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.85352 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.87305 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.89258 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.91211 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.93164 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.95117 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.9707 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
4.99023 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.00977 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.0293 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.04883 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.06836 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.08789 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.10742 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.12695 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.14648 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.16602 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.18555 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.20508 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.22461 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.24414 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.26367 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.2832 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.30273 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.32227 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.3418 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.36133 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.38086 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.40039 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.41992 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.43945 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.45898 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.47852 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.49805 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.51758 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.53711 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.55664 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.57617 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.5957 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.61523 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.63477 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.6543 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.67383 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.69336 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.71289 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.73242 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.75195 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.77148 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.79102 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.81055 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.83008 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.84961 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.86914 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.88867 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.9082 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.92773 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.94727 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.9668 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
5.98633 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.00586 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.02539 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.04492 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.06445 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.08398 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.10352 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.12305 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.14258 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.16211 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.18164 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.20117 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.2207 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.24023 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.25977 0.00253936 0.127279 0 0.000323208 0.00253936 0.806419 0.00220005
|
||||||
|
6.2793 0.001 0 0 0 0.001 0 0
|
||||||
|
6.29883 0.001 0 0 0 0.001 0 0
|
||||||
|
6.31836 0.001 0 0 0 0.001 0 0
|
||||||
|
6.33789 0.001 0 0 0 0.001 0 0
|
||||||
|
6.35742 0.001 0 0 0 0.001 0 0
|
||||||
|
6.37695 0.001 0 0 0 0.001 0 0
|
||||||
|
6.39648 0.001 0 0 0 0.001 0 0
|
||||||
|
6.41602 0.001 0 0 0 0.001 0 0
|
||||||
|
6.43555 0.001 0 0 0 0.001 0 0
|
||||||
|
6.45508 0.001 0 0 0 0.001 0 0
|
||||||
|
6.47461 0.001 0 0 0 0.001 0 0
|
||||||
|
6.49414 0.001 0 0 0 0.001 0 0
|
||||||
|
6.51367 0.001 0 0 0 0.001 0 0
|
||||||
|
6.5332 0.001 0 0 0 0.001 0 0
|
||||||
|
6.55273 0.001 0 0 0 0.001 0 0
|
||||||
|
6.57227 0.001 0 0 0 0.001 0 0
|
||||||
|
6.5918 0.001 0 0 0 0.001 0 0
|
||||||
|
6.61133 0.001 0 0 0 0.001 0 0
|
||||||
|
6.63086 0.001 0 0 0 0.001 0 0
|
||||||
|
6.65039 0.001 0 0 0 0.001 0 0
|
||||||
|
6.66992 0.001 0 0 0 0.001 0 0
|
||||||
|
6.68945 0.001 0 0 0 0.001 0 0
|
||||||
|
6.70898 0.001 0 0 0 0.001 0 0
|
||||||
|
6.72852 0.001 0 0 0 0.001 0 0
|
||||||
|
6.74805 0.001 0 0 0 0.001 0 0
|
||||||
|
6.76758 0.001 0 0 0 0.001 0 0
|
||||||
|
6.78711 0.001 0 0 0 0.001 0 0
|
||||||
|
6.80664 0.001 0 0 0 0.001 0 0
|
||||||
|
6.82617 0.001 0 0 0 0.001 0 0
|
||||||
|
6.8457 0.001 0 0 0 0.001 0 0
|
||||||
|
6.86523 0.001 0 0 0 0.001 0 0
|
||||||
|
6.88477 0.001 0 0 0 0.001 0 0
|
||||||
|
6.9043 0.001 0 0 0 0.001 0 0
|
||||||
|
6.92383 0.001 0 0 0 0.001 0 0
|
||||||
|
6.94336 0.001 0 0 0 0.001 0 0
|
||||||
|
6.96289 0.001 0 0 0 0.001 0 0
|
||||||
|
6.98242 0.001 0 0 0 0.001 0 0
|
||||||
|
7.00195 0.001 0 0 0 0.001 0 0
|
||||||
|
7.02148 0.001 0 0 0 0.001 0 0
|
||||||
|
7.04102 0.001 0 0 0 0.001 0 0
|
||||||
|
7.06055 0.001 0 0 0 0.001 0 0
|
||||||
|
7.08008 0.001 0 0 0 0.001 0 0
|
||||||
|
7.09961 0.001 0 0 0 0.001 0 0
|
||||||
|
7.11914 0.001 0 0 0 0.001 0 0
|
||||||
|
7.13867 0.001 0 0 0 0.001 0 0
|
||||||
|
7.1582 0.001 0 0 0 0.001 0 0
|
||||||
|
7.17773 0.001 0 0 0 0.001 0 0
|
||||||
|
7.19727 0.001 0 0 0 0.001 0 0
|
||||||
|
7.2168 0.001 0 0 0 0.001 0 0
|
||||||
|
7.23633 0.001 0 0 0 0.001 0 0
|
||||||
|
7.25586 0.001 0 0 0 0.001 0 0
|
||||||
|
7.27539 0.001 0 0 0 0.001 0 0
|
||||||
|
7.29492 0.001 0 0 0 0.001 0 0
|
||||||
|
7.31445 0.001 0 0 0 0.001 0 0
|
||||||
|
7.33398 0.001 0 0 0 0.001 0 0
|
||||||
|
7.35352 0.001 0 0 0 0.001 0 0
|
||||||
|
7.37305 0.001 0 0 0 0.001 0 0
|
||||||
|
7.39258 0.001 0 0 0 0.001 0 0
|
||||||
|
7.41211 0.001 0 0 0 0.001 0 0
|
||||||
|
7.43164 0.001 0 0 0 0.001 0 0
|
||||||
|
7.45117 0.001 0 0 0 0.001 0 0
|
||||||
|
7.4707 0.001 0 0 0 0.001 0 0
|
||||||
|
7.49023 0.001 0 0 0 0.001 0 0
|
||||||
|
7.50977 0.001 0 0 0 0.001 0 0
|
||||||
|
7.5293 0.001 0 0 0 0.001 0 0
|
||||||
|
7.54883 0.001 0 0 0 0.001 0 0
|
||||||
|
7.56836 0.001 0 0 0 0.001 0 0
|
||||||
|
7.58789 0.001 0 0 0 0.001 0 0
|
||||||
|
7.60742 0.001 0 0 0 0.001 0 0
|
||||||
|
7.62695 0.001 0 0 0 0.001 0 0
|
||||||
|
7.64648 0.001 0 0 0 0.001 0 0
|
||||||
|
7.66602 0.001 0 0 0 0.001 0 0
|
||||||
|
7.68555 0.001 0 0 0 0.001 0 0
|
||||||
|
7.70508 0.001 0 0 0 0.001 0 0
|
||||||
|
7.72461 0.001 0 0 0 0.001 0 0
|
||||||
|
7.74414 0.001 0 0 0 0.001 0 0
|
||||||
|
7.76367 0.001 0 0 0 0.001 0 0
|
||||||
|
7.7832 0.001 0 0 0 0.001 0 0
|
||||||
|
7.80273 0.001 0 0 0 0.001 0 0
|
||||||
|
7.82227 0.001 0 0 0 0.001 0 0
|
||||||
|
7.8418 0.001 0 0 0 0.001 0 0
|
||||||
|
7.86133 0.001 0 0 0 0.001 0 0
|
||||||
|
7.88086 0.001 0 0 0 0.001 0 0
|
||||||
|
7.90039 0.001 0 0 0 0.001 0 0
|
||||||
|
7.91992 0.001 0 0 0 0.001 0 0
|
||||||
|
7.93945 0.001 0 0 0 0.001 0 0
|
||||||
|
7.95898 0.001 0 0 0 0.001 0 0
|
||||||
|
7.97852 0.001 0 0 0 0.001 0 0
|
||||||
|
7.99805 0.001 0 0 0 0.001 0 0
|
||||||
|
8.01758 0.001 0 0 0 0.001 0 0
|
||||||
|
8.03711 0.001 0 0 0 0.001 0 0
|
||||||
|
8.05664 0.001 0 0 0 0.001 0 0
|
||||||
|
8.07617 0.001 0 0 0 0.001 0 0
|
||||||
|
8.0957 0.001 0 0 0 0.001 0 0
|
||||||
|
8.11523 0.001 0 0 0 0.001 0 0
|
||||||
|
8.13477 0.001 0 0 0 0.001 0 0
|
||||||
|
8.1543 0.001 0 0 0 0.001 0 0
|
||||||
|
8.17383 0.001 0 0 0 0.001 0 0
|
||||||
|
8.19336 0.001 0 0 0 0.001 0 0
|
||||||
|
8.21289 0.001 0 0 0 0.001 0 0
|
||||||
|
8.23242 0.001 0 0 0 0.001 0 0
|
||||||
|
8.25195 0.001 0 0 0 0.001 0 0
|
||||||
|
8.27148 0.001 0 0 0 0.001 0 0
|
||||||
|
8.29102 0.001 0 0 0 0.001 0 0
|
||||||
|
8.31055 0.001 0 0 0 0.001 0 0
|
||||||
|
8.33008 0.001 0 0 0 0.001 0 0
|
||||||
|
8.34961 0.001 0 0 0 0.001 0 0
|
||||||
|
8.36914 0.001 0 0 0 0.001 0 0
|
||||||
|
8.38867 0.001 0 0 0 0.001 0 0
|
||||||
|
8.4082 0.001 0 0 0 0.001 0 0
|
||||||
|
8.42773 0.001 0 0 0 0.001 0 0
|
||||||
|
8.44727 0.001 0 0 0 0.001 0 0
|
||||||
|
8.4668 0.001 0 0 0 0.001 0 0
|
||||||
|
8.48633 0.001 0 0 0 0.001 0 0
|
||||||
|
8.50586 0.001 0 0 0 0.001 0 0
|
||||||
|
8.52539 0.001 0 0 0 0.001 0 0
|
||||||
|
8.54492 0.001 0 0 0 0.001 0 0
|
||||||
|
8.56445 0.001 0 0 0 0.001 0 0
|
||||||
|
8.58398 0.001 0 0 0 0.001 0 0
|
||||||
|
8.60352 0.001 0 0 0 0.001 0 0
|
||||||
|
8.62305 0.001 0 0 0 0.001 0 0
|
||||||
|
8.64258 0.001 0 0 0 0.001 0 0
|
||||||
|
8.66211 0.001 0 0 0 0.001 0 0
|
||||||
|
8.68164 0.001 0 0 0 0.001 0 0
|
||||||
|
8.70117 0.001 0 0 0 0.001 0 0
|
||||||
|
8.7207 0.001 0 0 0 0.001 0 0
|
||||||
|
8.74023 0.001 0 0 0 0.001 0 0
|
||||||
|
8.75977 0.001 0 0 0 0.001 0 0
|
||||||
|
8.7793 0.001 0 0 0 0.001 0 0
|
||||||
|
8.79883 0.001 0 0 0 0.001 0 0
|
||||||
|
8.81836 0.001 0 0 0 0.001 0 0
|
||||||
|
8.83789 0.001 0 0 0 0.001 0 0
|
||||||
|
8.85742 0.001 0 0 0 0.001 0 0
|
||||||
|
8.87695 0.001 0 0 0 0.001 0 0
|
||||||
|
8.89648 0.001 0 0 0 0.001 0 0
|
||||||
|
8.91602 0.001 0 0 0 0.001 0 0
|
||||||
|
8.93555 0.001 0 0 0 0.001 0 0
|
||||||
|
8.95508 0.001 0 0 0 0.001 0 0
|
||||||
|
8.97461 0.001 0 0 0 0.001 0 0
|
||||||
|
8.99414 0.001 0 0 0 0.001 0 0
|
||||||
|
9.01367 0.001 0 0 0 0.001 0 0
|
||||||
|
9.0332 0.001 0 0 0 0.001 0 0
|
||||||
|
9.05273 0.001 0 0 0 0.001 0 0
|
||||||
|
9.07227 0.001 0 0 0 0.001 0 0
|
||||||
|
9.0918 0.001 0 0 0 0.001 0 0
|
||||||
|
9.11133 0.001 0 0 0 0.001 0 0
|
||||||
|
9.13086 0.001 0 0 0 0.001 0 0
|
||||||
|
9.15039 0.001 0 0 0 0.001 0 0
|
||||||
|
9.16992 0.001 0 0 0 0.001 0 0
|
||||||
|
9.18945 0.001 0 0 0 0.001 0 0
|
||||||
|
9.20898 0.001 0 0 0 0.001 0 0
|
||||||
|
9.22852 0.001 0 0 0 0.001 0 0
|
||||||
|
9.24805 0.001 0 0 0 0.001 0 0
|
||||||
|
9.26758 0.001 0 0 0 0.001 0 0
|
||||||
|
9.28711 0.001 0 0 0 0.001 0 0
|
||||||
|
9.30664 0.001 0 0 0 0.001 0 0
|
||||||
|
9.32617 0.001 0 0 0 0.001 0 0
|
||||||
|
9.3457 0.001 0 0 0 0.001 0 0
|
||||||
|
9.36523 0.001 0 0 0 0.001 0 0
|
||||||
|
9.38477 0.001 0 0 0 0.001 0 0
|
||||||
|
9.4043 0.001 0 0 0 0.001 0 0
|
||||||
|
9.42383 0.001 0 0 0 0.001 0 0
|
||||||
|
9.44336 0.001 0 0 0 0.001 0 0
|
||||||
|
9.46289 0.001 0 0 0 0.001 0 0
|
||||||
|
9.48242 0.001 0 0 0 0.001 0 0
|
||||||
|
9.50195 0.001 0 0 0 0.001 0 0
|
||||||
|
9.52148 0.001 0 0 0 0.001 0 0
|
||||||
|
9.54102 0.001 0 0 0 0.001 0 0
|
||||||
|
9.56055 0.001 0 0 0 0.001 0 0
|
||||||
|
9.58008 0.001 0 0 0 0.001 0 0
|
||||||
|
9.59961 0.001 0 0 0 0.001 0 0
|
||||||
|
9.61914 0.001 0 0 0 0.001 0 0
|
||||||
|
9.63867 0.001 0 0 0 0.001 0 0
|
||||||
|
9.6582 0.001 0 0 0 0.001 0 0
|
||||||
|
9.67773 0.001 0 0 0 0.001 0 0
|
||||||
|
9.69727 0.001 0 0 0 0.001 0 0
|
||||||
|
9.7168 0.001 0 0 0 0.001 0 0
|
||||||
|
9.73633 0.001 0 0 0 0.001 0 0
|
||||||
|
9.75586 0.001 0 0 0 0.001 0 0
|
||||||
|
9.77539 0.001 0 0 0 0.001 0 0
|
||||||
|
9.79492 0.001 0 0 0 0.001 0 0
|
||||||
|
9.81445 0.001 0 0 0 0.001 0 0
|
||||||
|
9.83398 0.001 0 0 0 0.001 0 0
|
||||||
|
9.85352 0.001 0 0 0 0.001 0 0
|
||||||
|
9.87305 0.001 0 0 0 0.001 0 0
|
||||||
|
9.89258 0.001 0 0 0 0.001 0 0
|
||||||
|
9.91211 0.001 0 0 0 0.001 0 0
|
||||||
|
9.93164 0.001 0 0 0 0.001 0 0
|
||||||
|
9.95117 0.001 0 0 0 0.001 0 0
|
||||||
|
9.9707 0.001 0 0 0 0.001 0 0
|
||||||
|
9.99023 0.001 0 0 0 0.001 0 0
|
Can't render this file because it has a wrong number of fields in line 18.
|
Loading…
x
Reference in New Issue
Block a user