Added superclass

This commit is contained in:
André R. Brodtkorb 2018-08-09 15:14:50 +02:00
parent 8863f654be
commit 4da6fd043d
12 changed files with 792 additions and 4296 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -21,13 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
""" """
#Import packages we need #Import packages we need
import numpy as np from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -42,7 +36,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations Class that solves the SW equations
""" """
class FORCE: class FORCE (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -63,80 +57,39 @@ class FORCE:
dx, dy, dt, \ dx, dy, dt, \
g, \ g, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
h0, hu0, hv0, \
nx, ny, \
1, 1, \
dx, dy, dt, \
g, \
block_width, block_height);
#Get kernels #Get kernels
self.force_module = context.get_kernel("FORCE_kernel.cu", block_width, block_height) self.module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
self.force_kernel = self.force_module.get_function("FORCEKernel") self.kernel = self.module.get_function("FORCEKernel")
self.force_kernel.prepare("iiffffPiPiPiPiPiPi") self.kernel.prepare("iiffffPiPiPiPiPiPi")
#Create data by uploading to device
ghost_cells_x = 1
ghost_cells_y = 1
self.data = Common.SWEDataArakawaA(self.stream, \
nx, ny, \
ghost_cells_x, ghost_cells_y, \
h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "First order centered" return "First order centered"
"""
Function which steps n timesteps
"""
def step(self, t_end=0.0):
n = int(t_end / self.dt + 1)
for i in range(0, n): def simulate(self, t_end):
local_dt = np.float32(min(self.dt, t_end-i*self.dt)) return super().simulateEuler(t_end)
if (local_dt <= 0.0):
break
self.force_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \ def stepEuler(self, dt):
self.nx, self.ny, \ self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.dx, self.dy, local_dt, \ self.nx, self.ny, \
self.g, \ self.dx, self.dy, dt, \
self.data.h0.data.gpudata, self.data.h0.pitch, \ self.g, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \ self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \ self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \ self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \ self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch) self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.t += local_dt self.data.swap()
self.t += dt
self.data.swap()
return self.t, n
def download(self):
return self.data.download(self.stream)

View File

@ -20,13 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
""" """
#Import packages we need #Import packages we need
import numpy as np from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -37,7 +31,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
""" """
class HLL: class HLL (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -58,80 +52,39 @@ class HLL:
dx, dy, dt, \ dx, dy, dt, \
g, \ g, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
h0, hu0, hv0, \
nx, ny, \
1, 1, \
dx, dy, dt, \
g, \
block_width, block_height);
#Get kernels #Get kernels
self.hll_module = context.get_kernel("HLL_kernel.cu", block_width, block_height) self.module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
self.hll_kernel = self.hll_module.get_function("HLLKernel") self.kernel = self.module.get_function("HLLKernel")
self.hll_kernel.prepare("iiffffPiPiPiPiPiPi") self.kernel.prepare("iiffffPiPiPiPiPiPi")
#Create data by uploading to device
ghost_cells_x = 1
ghost_cells_y = 1
self.data = Common.SWEDataArakawaA(self.stream, \
nx, ny, \
ghost_cells_x, ghost_cells_y, \
h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "Harten-Lax-van Leer" return "Harten-Lax-van Leer"
def simulate(self, t_end):
""" return super().simulateEuler(t_end)
Function which steps n timesteps
"""
def step(self, t_end=0.0):
n = int(t_end / self.dt + 1)
for i in range(0, n): def stepEuler(self, dt):
local_dt = np.float32(min(self.dt, t_end-i*self.dt)) self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
if (local_dt <= 0.0): self.dx, self.dy, dt, \
break self.g, \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
self.t += dt
self.hll_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.t += local_dt
self.data.swap()
return self.t, n
def download(self):
return self.data.download(self.stream)

View File

@ -21,12 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#Import packages we need #Import packages we need
import numpy as np import numpy as np
from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -39,7 +34,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations using the Forward-Backward linear scheme Class that solves the SW equations using the Forward-Backward linear scheme
""" """
class HLL2: class HLL2 (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -61,99 +56,61 @@ class HLL2:
g, \ g, \
theta=1.8, \ theta=1.8, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
h0, hu0, hv0, \
nx, ny, \
2, 2, \
dx, dy, dt, \
g, \
block_width, block_height);
self.theta = np.float32(theta)
#Get kernels #Get kernels
self.hll2_module = context.get_kernel("HLL2_kernel.cu", block_width, block_height) self.module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
self.hll2_kernel = self.hll2_module.get_function("HLL2Kernel") self.kernel = self.module.get_function("HLL2Kernel")
self.hll2_kernel.prepare("iifffffiPiPiPiPiPiPi") self.kernel.prepare("iifffffiPiPiPiPiPiPi")
#Create data by uploading to device
ghost_cells_x = 2
ghost_cells_y = 2
self.data = Common.SWEDataArakawaA(self.stream, \
nx, ny, \
ghost_cells_x, ghost_cells_y, \
h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
self.theta = np.float32(theta)
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "Harten-Lax-van Leer (2nd order)" return "Harten-Lax-van Leer (2nd order)"
def simulate(self, t_end):
""" return super().simulateDimsplit(t_end)
Function which steps n timesteps
"""
def step(self, t_end=0.0):
n = int(t_end / (2.0*self.dt) + 1)
for i in range(0, n): def stepEuler(self, dt):
#Dimensional splitting: second order accurate for every other timestep, return self.stepDimsplitXY(dt)
#thus run two timesteps in a go
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
if (local_dt <= 0.0):
break
#Along X, then Y def stepDimsplitXY(self, dt):
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \ self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \ self.nx, self.ny, \
self.dx, self.dy, local_dt, \ self.dx, self.dy, dt, \
self.g, \ self.g, \
self.theta, \ self.theta, \
np.int32(0), \ np.int32(0), \
self.data.h0.data.gpudata, self.data.h0.pitch, \ self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \ self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \ self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \ self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \ self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch) self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap() self.data.swap()
self.t += dt
#Along Y, then X def stepDimsplitYX(self, dt):
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \ self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \ self.nx, self.ny, \
self.dx, self.dy, local_dt, \ self.dx, self.dy, dt, \
self.g, \ self.g, \
self.theta, \ self.theta, \
np.int32(1), \ np.int32(1), \
self.data.h0.data.gpudata, self.data.h0.pitch, \ self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \ self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \ self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \ self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \ self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch) self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap() self.data.swap()
self.t += dt
self.t += local_dt
return self.t, 2*n
def download(self):
return self.data.download(self.stream)

View File

@ -26,12 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#Import packages we need #Import packages we need
import numpy as np import numpy as np
from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -40,7 +35,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations using the Forward-Backward linear scheme Class that solves the SW equations using the Forward-Backward linear scheme
""" """
class KP07: class KP07 (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -53,131 +48,66 @@ class KP07:
dy: Grid cell spacing along y-axis (20 000 m) dy: Grid cell spacing along y-axis (20 000 m)
dt: Size of each timestep (90 s) dt: Size of each timestep (90 s)
g: Gravitational accelleration (9.81 m/s^2) g: Gravitational accelleration (9.81 m/s^2)
f: Coriolis parameter (1.2e-4 s^1)
r: Bottom friction coefficient (2.4e-3 m/s) r: Bottom friction coefficient (2.4e-3 m/s)
wind_type: Type of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
wind_tau0: Amplitude of wind stress (Pa)
wind_rho: Density of sea water (1025.0 kg / m^3)
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
wind_xm: Maximum wind stress for bell shaped wind stress
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
""" """
def __init__(self, \ def __init__(self, \
context, \ context, \
h0, hu0, hv0, \ h0, hu0, hv0, \
nx, ny, \ nx, ny, \
dx, dy, dt, \ dx, dy, dt, \
g, theta=1.3, \ g, \
r=0.0, use_rk2=True, theta=1.3, r=0.0, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
#Get kernels h0, hu0, hv0, \
self.kp07_module = context.get_kernel("KP07_kernel.cu", block_width, block_height) nx, ny, \
self.kp07_kernel = self.kp07_module.get_function("KP07Kernel") 2, 2, \
self.kp07_kernel.prepare("iiffffffiPiPiPiPiPiPi") dx, dy, dt, \
g, \
#Create data by uploading to device block_width, block_height);
ghost_cells_x = 2
ghost_cells_y = 2
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
self.theta = np.float32(theta) self.theta = np.float32(theta)
self.r = np.float32(r) self.r = np.float32(r)
self.use_rk2 = use_rk2
#Get kernels
self.module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
self.kernel = self.module.get_function("KP07Kernel")
self.kernel.prepare("iiffffffiPiPiPiPiPiPi")
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "Kurganov-Petrova 2007" return "Kurganov-Petrova 2007"
""" def simulate(self, t_end):
Function which steps n timesteps return super().simulateRK(t_end, 2)
"""
def step(self, t_end=0.0):
n = int(t_end / self.dt + 1)
for i in range(0, n): def substepRK(self, dt, substep):
local_dt = np.float32(min(self.dt, t_end-i*self.dt)) self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
if (local_dt <= 0.0): self.dx, self.dy, dt, \
break self.g, \
self.theta, \
self.r, \
np.int32(substep), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
if (self.use_rk2): def stepEuler(self, dt):
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \ self.substepRK(dt, 0)
self.nx, self.ny, \ self.t += dt
self.dx, self.dy, local_dt, \
self.g, \
self.theta, \
self.r, \
np.int32(0), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
self.theta, \
self.r, \
np.int32(1), \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch)
else:
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
self.theta, \
self.r, \
np.int32(0), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.cl_data.swap()
self.t += local_dt
return self.t, n def stepRK(self, dt, order):
if (order != 2):
raise NotImplementedError("Only second order implemented")
self.substepRK(dt, 0)
self.substepRK(dt, 1)
self.t += dt
def download(self): def download(self):
return self.data.download(self.stream) return self.data.download(self.stream)

View File

@ -26,12 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#Import packages we need #Import packages we need
import numpy as np import numpy as np
from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -40,7 +35,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations using the dimentionally split KP07 scheme Class that solves the SW equations using the dimentionally split KP07 scheme
""" """
class KP07_dimsplit: class KP07_dimsplit (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -62,98 +57,62 @@ class KP07_dimsplit:
g, \ g, \
theta=1.3, \ theta=1.3, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
h0, hu0, hv0, \
nx, ny, \
2, 2, \
dx, dy, dt, \
g, \
block_width, block_height);
self.theta = np.float32(theta)
#Get kernels #Get kernels
self.kp07_dimsplit_module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height) self.module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
self.kp07_dimsplit_kernel = self.kp07_dimsplit_module.get_function("KP07DimsplitKernel") self.kernel = self.module.get_function("KP07DimsplitKernel")
self.kp07_dimsplit_kernel.prepare("iifffffiPiPiPiPiPiPi") self.kernel.prepare("iifffffiPiPiPiPiPiPi")
#Create data by uploading to device
ghost_cells_x = 2
ghost_cells_y = 2
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
self.theta = np.float32(theta)
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "Kurganov-Petrova 2007 dimensionally split" return "Kurganov-Petrova 2007 dimensionally split"
def simulate(self, t_end):
return super().simulateDimsplit(t_end)
""" def stepEuler(self, dt):
Function which steps n timesteps return self.stepDimsplitXY(dt)
"""
def step(self, t_end=0.0): def stepDimsplitXY(self, dt):
n = int(t_end / (2.0*self.dt) + 1) self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, dt, \
self.g, \
self.theta, \
np.int32(0), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
self.t += dt
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, dt, \
self.g, \
self.theta, \
np.int32(1), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
self.t += dt
for i in range(0, n):
#Dimensional splitting: second order accurate for every other timestep,
#thus run two timesteps in a go
#Compute timestep
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
if (local_dt <= 0.0):
break
#Along X, then Y
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
self.theta, \
np.int32(0), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
#Along Y, then X
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
self.theta, \
np.int32(1), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
self.t += 2.0*local_dt
return self.t, 2*n
def download(self):
return self.data.download(self.stream)

View File

@ -21,13 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
""" """
#Import packages we need #Import packages we need
import numpy as np from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -38,7 +32,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations using the Lax Friedrichs scheme Class that solves the SW equations using the Lax Friedrichs scheme
""" """
class LxF: class LxF (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -59,80 +53,39 @@ class LxF:
dx, dy, dt, \ dx, dy, dt, \
g, \ g, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
h0, hu0, hv0, \
nx, ny, \
1, 1, \
dx, dy, dt, \
g, \
block_width, block_height);
#Get kernels # Get kernels
self.lxf_module = context.get_kernel("LxF_kernel.cu", block_width, block_height) self.module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
self.lxf_kernel = self.lxf_module.get_function("LxFKernel") self.kernel = self.module.get_function("LxFKernel")
self.lxf_kernel.prepare("iiffffPiPiPiPiPiPi") self.kernel.prepare("iiffffPiPiPiPiPiPi")
#Create data by uploading to device
ghost_cells_x = 1
ghost_cells_y = 1
self.data = Common.SWEDataArakawaA(self.stream, \
nx, ny, \
ghost_cells_x, ghost_cells_y, \
h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "Lax Friedrichs" return "Lax Friedrichs"
"""
Function which steps n timesteps
"""
def step(self, t_end=0.0):
n = int(t_end / self.dt + 1)
for i in range(0, n): def simulate(self, t_end):
local_dt = np.float32(min(self.dt, t_end-i*self.dt)) return super().simulateEuler(t_end)
if (local_dt <= 0.0):
break
self.lxf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \ def stepEuler(self, dt):
self.nx, self.ny, \ self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.dx, self.dy, local_dt, \ self.nx, self.ny, \
self.g, \ self.dx, self.dy, dt, \
self.data.h0.data.gpudata, self.data.h0.pitch, \ self.g, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \ self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \ self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \ self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \ self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch) self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.t += local_dt self.data.swap()
self.t += dt
self.data.swap()
return self.t, n
def download(self):
return self.data.download(self.stream)

179
SWESimulators/Simulator.py Normal file
View File

@ -0,0 +1,179 @@
# -*- coding: utf-8 -*-
"""
This python module implements the classical Lax-Friedrichs numerical
scheme for the shallow water equations
Copyright (C) 2016 SINTEF ICT
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
#Import packages we need
import numpy as np
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
class BaseSimulator:
"""
Initialization routine
context: GPU context to use
kernel_wrapper: wrapper function of GPU kernel
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
nx: Number of cells along x-axis
ny: Number of cells along y-axis
dx: Grid cell spacing along x-axis (20 000 m)
dy: Grid cell spacing along y-axis (20 000 m)
dt: Size of each timestep (90 s)
g: Gravitational accelleration (9.81 m/s^2)
"""
def __init__(self, \
context, \
h0, hu0, hv0, \
nx, ny, \
ghost_cells_x, ghost_cells_y, \
dx, dy, dt, \
g, \
block_width, block_height):
#Create a CUDA stream
self.stream = cuda.Stream()
#Create data by uploading to device
self.data = Common.SWEDataArakawaA(self.stream, \
nx, ny, \
ghost_cells_x, ghost_cells_y, \
h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#GPU kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
#Keep track of simulation time
self.t = 0.0;
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
"""
Function which simulates forward in time using the default simulation type
"""
def simulate(self, t_end):
raise(exceptions.NotImplementedError("Needs to be implemented in subclass"))
"""
Function which simulates t_end seconds using forward Euler
Requires that the stepEuler functionality is implemented in the subclasses
"""
def simulateEuler(self, t_end):
# Compute number of timesteps to perform
n = int(t_end / self.dt + 1)
for i in range(0, n):
# Compute timestep for "this" iteration
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
# Stop if end reached (should not happen)
if (local_dt <= 0.0):
break
# Step with forward Euler
self.stepEuler(local_dt)
return self.t, n
"""
Function which simulates t_end seconds using Runge-Kutta 2
Requires that the stepRK functionality is implemented in the subclasses
"""
def simulateRK(self, t_end, order):
# Compute number of timesteps to perform
n = int(t_end / self.dt + 1)
for i in range(0, n):
# Compute timestep for "this" iteration
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
# Stop if end reached (should not happen)
if (local_dt <= 0.0):
break
# Perform all the Runge-Kutta substeps
self.stepRK(local_dt, order)
return self.t, n
"""
Function which simulates t_end seconds using second order dimensional splitting (XYYX)
Requires that the stepDimsplitX and stepDimsplitY functionality is implemented in the subclasses
"""
def simulateDimsplit(self, t_end):
# Compute number of timesteps to perform
n = int(t_end / (2.0*self.dt) + 1)
for i in range(0, n):
# Compute timestep for "this" iteration
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
# Stop if end reached (should not happen)
if (local_dt <= 0.0):
break
# Perform the dimensional split substeps
self.stepDimsplitXY(local_dt)
self.stepDimsplitYX(local_dt)
return self.t, 2*n
"""
Function which performs one single timestep of size dt using forward euler
"""
def stepEuler(self, dt):
raise(NotImplementedError("Needs to be implemented in subclass"))
def stepRK(self, dt, substep):
raise(NotImplementedError("Needs to be implemented in subclass"))
def stepDimsplitXY(self, dt):
raise(NotImplementedError("Needs to be implemented in subclass"))
def stepDimsplitYX(self, dt):
raise(NotImplementedError("Needs to be implemented in subclass"))
def sim_time(self):
return self.t
def download(self):
return self.data.download(self.stream)

View File

@ -22,12 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#Import packages we need #Import packages we need
import numpy as np import numpy as np
from SWESimulators import Simulator
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
from SWESimulators import Common
@ -36,7 +31,7 @@ from SWESimulators import Common
""" """
Class that solves the SW equations using the Forward-Backward linear scheme Class that solves the SW equations using the Forward-Backward linear scheme
""" """
class WAF: class WAF (Simulator.BaseSimulator):
""" """
Initialization routine Initialization routine
@ -57,92 +52,56 @@ class WAF:
dx, dy, dt, \ dx, dy, dt, \
g, \ g, \
block_width=16, block_height=16): block_width=16, block_height=16):
#Create a CUDA stream
self.stream = cuda.Stream() # Call super constructor
super().__init__(context, \
h0, hu0, hv0, \
nx, ny, \
2, 2, \
dx, dy, dt, \
g, \
block_width, block_height);
#Get kernels #Get kernels
self.waf_module = context.get_kernel("WAF_kernel.cu", block_width, block_height) self.module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
self.waf_kernel = self.waf_module.get_function("WAFKernel") self.kernel = self.module.get_function("WAFKernel")
self.waf_kernel.prepare("iiffffiPiPiPiPiPiPi") self.kernel.prepare("iiffffiPiPiPiPiPiPi")
#Create data by uploading to device
ghost_cells_x = 2
ghost_cells_y = 2
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
#Save input parameters
#Notice that we need to specify them in the correct dataformat for the
#OpenCL kernel
self.nx = np.int32(nx)
self.ny = np.int32(ny)
self.dx = np.float32(dx)
self.dy = np.float32(dy)
self.dt = np.float32(dt)
self.g = np.float32(g)
#Initialize time
self.t = np.float32(0.0)
#Compute kernel launch parameters
self.local_size = (block_width, block_height, 1)
self.global_size = ( \
int(np.ceil(self.nx / float(self.local_size[0]))), \
int(np.ceil(self.ny / float(self.local_size[1]))) \
)
def __str__(self): def __str__(self):
return "Weighted average flux" return "Weighted average flux"
""" def simulate(self, t_end):
Function which steps n timesteps return super().simulateDimsplit(t_end)
"""
def step(self, t_end=0.0):
n = int(t_end / (2.0*self.dt) + 1)
for i in range(0, n): def stepEuler(self, dt):
#Dimensional splitting: second order accurate for every other timestep, return self.stepDimsplitXY(dt)
#thus run two timesteps in a go
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
if (local_dt <= 0.0):
break
#Along X, then Y
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
np.int32(0), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
#Along Y, then X
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, local_dt, \
self.g, \
np.int32(1), \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch)
self.t += local_dt
return self.t, 2*n def stepDimsplitXY(self, dt):
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, dt, \
self.g, \
def download(self): np.int32(0), \
return self.data.download(self.stream) self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
self.t += dt
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
self.nx, self.ny, \
self.dx, self.dy, dt, \
self.g, \
np.int32(1), \
self.data.h0.data.gpudata, self.data.h0.pitch, \
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
self.data.h1.data.gpudata, self.data.h1.pitch, \
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
self.data.hv1.data.gpudata, self.data.hv1.pitch)
self.data.swap()
self.t += dt