mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
Added superclass
This commit is contained in:
parent
8863f654be
commit
4da6fd043d
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -21,13 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -42,7 +36,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations
|
||||
"""
|
||||
class FORCE:
|
||||
class FORCE (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -63,62 +57,31 @@ class FORCE:
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
#Get kernels
|
||||
self.force_module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
|
||||
self.force_kernel = self.force_module.get_function("FORCEKernel")
|
||||
self.force_kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
self.module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("FORCEKernel")
|
||||
self.kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "First order centered"
|
||||
|
||||
def simulate(self, t_end):
|
||||
return super().simulateEuler(t_end)
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.force_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepEuler(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
@ -126,17 +89,7 @@ class FORCE:
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
self.data.swap()
|
||||
|
||||
return self.t, n
|
||||
self.t += dt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
|
@ -20,13 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -37,7 +31,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
|
||||
"""
|
||||
class HLL:
|
||||
class HLL (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -58,62 +52,31 @@ class HLL:
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
#Get kernels
|
||||
self.hll_module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
|
||||
self.hll_kernel = self.hll_module.get_function("HLLKernel")
|
||||
self.hll_kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
self.module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("HLLKernel")
|
||||
self.kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "Harten-Lax-van Leer"
|
||||
|
||||
def simulate(self, t_end):
|
||||
return super().simulateEuler(t_end)
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.hll_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepEuler(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
@ -121,17 +84,7 @@ class HLL:
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
self.data.swap()
|
||||
|
||||
return self.t, n
|
||||
self.t += dt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
|
@ -21,12 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -39,7 +34,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class HLL2:
|
||||
class HLL2 (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -61,66 +56,36 @@ class HLL2:
|
||||
g, \
|
||||
theta=1.8, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
#Get kernels
|
||||
self.hll2_module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
|
||||
self.hll2_kernel = self.hll2_module.get_function("HLL2Kernel")
|
||||
self.hll2_kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
#Get kernels
|
||||
self.module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("HLL2Kernel")
|
||||
self.kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "Harten-Lax-van Leer (2nd order)"
|
||||
|
||||
def simulate(self, t_end):
|
||||
return super().simulateDimsplit(t_end)
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
def stepEuler(self, dt):
|
||||
return self.stepDimsplitXY(dt)
|
||||
|
||||
for i in range(0, n):
|
||||
#Dimensional splitting: second order accurate for every other timestep,
|
||||
#thus run two timesteps in a go
|
||||
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
#Along X, then Y
|
||||
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepDimsplitXY(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
@ -131,11 +96,12 @@ class HLL2:
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
self.t += dt
|
||||
|
||||
#Along Y, then X
|
||||
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepDimsplitYX(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
@ -146,14 +112,5 @@ class HLL2:
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t, 2*n
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
self.t += dt
|
||||
|
@ -26,12 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -40,7 +35,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class KP07:
|
||||
class KP07 (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -53,131 +48,66 @@ class KP07:
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
f: Coriolis parameter (1.2e-4 s^1)
|
||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||
wind_type: Type of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
||||
wind_tau0: Amplitude of wind stress (Pa)
|
||||
wind_rho: Density of sea water (1025.0 kg / m^3)
|
||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
||||
"""
|
||||
def __init__(self, \
|
||||
context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
dx, dy, dt, \
|
||||
g, theta=1.3, \
|
||||
r=0.0, use_rk2=True,
|
||||
g, \
|
||||
theta=1.3, r=0.0, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
#Get kernels
|
||||
self.kp07_module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
|
||||
self.kp07_kernel = self.kp07_module.get_function("KP07Kernel")
|
||||
self.kp07_kernel.prepare("iiffffffiPiPiPiPiPiPi")
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
self.r = np.float32(r)
|
||||
self.use_rk2 = use_rk2
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
|
||||
#Get kernels
|
||||
self.module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("KP07Kernel")
|
||||
self.kernel.prepare("iiffffffiPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "Kurganov-Petrova 2007"
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
def simulate(self, t_end):
|
||||
return super().simulateRK(t_end, 2)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
if (self.use_rk2):
|
||||
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def substepRK(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.r, \
|
||||
np.int32(0), \
|
||||
np.int32(substep), \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
|
||||
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.r, \
|
||||
np.int32(1), \
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.pitch)
|
||||
else:
|
||||
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
self.r, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.cl_data.swap()
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t, n
|
||||
|
||||
|
||||
def stepEuler(self, dt):
|
||||
self.substepRK(dt, 0)
|
||||
self.t += dt
|
||||
|
||||
def stepRK(self, dt, order):
|
||||
if (order != 2):
|
||||
raise NotImplementedError("Only second order implemented")
|
||||
self.substepRK(dt, 0)
|
||||
self.substepRK(dt, 1)
|
||||
self.t += dt
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
@ -26,12 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -40,7 +35,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations using the dimentionally split KP07 scheme
|
||||
"""
|
||||
class KP07_dimsplit:
|
||||
class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -62,64 +57,36 @@ class KP07_dimsplit:
|
||||
g, \
|
||||
theta=1.3, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
#Get kernels
|
||||
self.kp07_dimsplit_module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
|
||||
self.kp07_dimsplit_kernel = self.kp07_dimsplit_module.get_function("KP07DimsplitKernel")
|
||||
self.kp07_dimsplit_kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
#Get kernels
|
||||
self.module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("KP07DimsplitKernel")
|
||||
self.kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "Kurganov-Petrova 2007 dimensionally split"
|
||||
|
||||
def simulate(self, t_end):
|
||||
return super().simulateDimsplit(t_end)
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
def stepEuler(self, dt):
|
||||
return self.stepDimsplitXY(dt)
|
||||
|
||||
for i in range(0, n):
|
||||
#Dimensional splitting: second order accurate for every other timestep,
|
||||
#thus run two timesteps in a go
|
||||
|
||||
#Compute timestep
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
#Along X, then Y
|
||||
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepDimsplitXY(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
@ -130,11 +97,12 @@ class KP07_dimsplit:
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
self.t += dt
|
||||
|
||||
#Along Y, then X
|
||||
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepDimsplitYX(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
@ -145,15 +113,6 @@ class KP07_dimsplit:
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
|
||||
self.t += 2.0*local_dt
|
||||
self.t += dt
|
||||
|
||||
|
||||
return self.t, 2*n
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
|
@ -21,13 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -38,7 +32,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations using the Lax Friedrichs scheme
|
||||
"""
|
||||
class LxF:
|
||||
class LxF (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -59,62 +53,31 @@ class LxF:
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
# Get kernels
|
||||
self.lxf_module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
|
||||
self.lxf_kernel = self.lxf_module.get_function("LxFKernel")
|
||||
self.lxf_kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 1
|
||||
ghost_cells_y = 1
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
|
||||
self.module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("LxFKernel")
|
||||
self.kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "Lax Friedrichs"
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / self.dt + 1)
|
||||
def simulate(self, t_end):
|
||||
return super().simulateEuler(t_end)
|
||||
|
||||
for i in range(0, n):
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
self.lxf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepEuler(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
@ -122,17 +85,7 @@ class LxF:
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
self.data.swap()
|
||||
|
||||
return self.t, n
|
||||
self.t += dt
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
|
179
SWESimulators/Simulator.py
Normal file
179
SWESimulators/Simulator.py
Normal file
@ -0,0 +1,179 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the classical Lax-Friedrichs numerical
|
||||
scheme for the shallow water equations
|
||||
|
||||
Copyright (C) 2016 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
|
||||
|
||||
class BaseSimulator:
|
||||
"""
|
||||
Initialization routine
|
||||
context: GPU context to use
|
||||
kernel_wrapper: wrapper function of GPU kernel
|
||||
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||
nx: Number of cells along x-axis
|
||||
ny: Number of cells along y-axis
|
||||
dx: Grid cell spacing along x-axis (20 000 m)
|
||||
dy: Grid cell spacing along y-axis (20 000 m)
|
||||
dt: Size of each timestep (90 s)
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
"""
|
||||
def __init__(self, \
|
||||
context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
#Create data by uploading to device
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#GPU kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Keep track of simulation time
|
||||
self.t = 0.0;
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
"""
|
||||
Function which simulates forward in time using the default simulation type
|
||||
"""
|
||||
def simulate(self, t_end):
|
||||
raise(exceptions.NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
"""
|
||||
Function which simulates t_end seconds using forward Euler
|
||||
Requires that the stepEuler functionality is implemented in the subclasses
|
||||
"""
|
||||
def simulateEuler(self, t_end):
|
||||
# Compute number of timesteps to perform
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
# Compute timestep for "this" iteration
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
# Stop if end reached (should not happen)
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
# Step with forward Euler
|
||||
self.stepEuler(local_dt)
|
||||
|
||||
return self.t, n
|
||||
|
||||
"""
|
||||
Function which simulates t_end seconds using Runge-Kutta 2
|
||||
Requires that the stepRK functionality is implemented in the subclasses
|
||||
"""
|
||||
def simulateRK(self, t_end, order):
|
||||
|
||||
# Compute number of timesteps to perform
|
||||
n = int(t_end / self.dt + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
# Compute timestep for "this" iteration
|
||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||
|
||||
# Stop if end reached (should not happen)
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
# Perform all the Runge-Kutta substeps
|
||||
self.stepRK(local_dt, order)
|
||||
|
||||
return self.t, n
|
||||
|
||||
"""
|
||||
Function which simulates t_end seconds using second order dimensional splitting (XYYX)
|
||||
Requires that the stepDimsplitX and stepDimsplitY functionality is implemented in the subclasses
|
||||
"""
|
||||
def simulateDimsplit(self, t_end):
|
||||
|
||||
# Compute number of timesteps to perform
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
|
||||
for i in range(0, n):
|
||||
# Compute timestep for "this" iteration
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
|
||||
# Stop if end reached (should not happen)
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
# Perform the dimensional split substeps
|
||||
self.stepDimsplitXY(local_dt)
|
||||
self.stepDimsplitYX(local_dt)
|
||||
|
||||
|
||||
return self.t, 2*n
|
||||
|
||||
|
||||
"""
|
||||
Function which performs one single timestep of size dt using forward euler
|
||||
"""
|
||||
def stepEuler(self, dt):
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def stepRK(self, dt, substep):
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def stepDimsplitXY(self, dt):
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def sim_time(self):
|
||||
return self.t
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
@ -22,12 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from SWESimulators import Common
|
||||
from SWESimulators import Simulator
|
||||
|
||||
|
||||
|
||||
@ -36,7 +31,7 @@ from SWESimulators import Common
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
class WAF:
|
||||
class WAF (Simulator.BaseSimulator):
|
||||
|
||||
"""
|
||||
Initialization routine
|
||||
@ -57,62 +52,34 @@ class WAF:
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width=16, block_height=16):
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
|
||||
#Get kernels
|
||||
self.waf_module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
|
||||
self.waf_kernel = self.waf_module.get_function("WAFKernel")
|
||||
self.waf_kernel.prepare("iiffffiPiPiPiPiPiPi")
|
||||
|
||||
#Create data by uploading to device
|
||||
ghost_cells_x = 2
|
||||
ghost_cells_y = 2
|
||||
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#OpenCL kernel
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.dt = np.float32(dt)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Initialize time
|
||||
self.t = np.float32(0.0)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.local_size = (block_width, block_height, 1)
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
|
||||
|
||||
self.module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
|
||||
self.kernel = self.module.get_function("WAFKernel")
|
||||
self.kernel.prepare("iiffffiPiPiPiPiPiPi")
|
||||
|
||||
def __str__(self):
|
||||
return "Weighted average flux"
|
||||
|
||||
"""
|
||||
Function which steps n timesteps
|
||||
"""
|
||||
def step(self, t_end=0.0):
|
||||
n = int(t_end / (2.0*self.dt) + 1)
|
||||
def simulate(self, t_end):
|
||||
return super().simulateDimsplit(t_end)
|
||||
|
||||
for i in range(0, n):
|
||||
#Dimensional splitting: second order accurate for every other timestep,
|
||||
#thus run two timesteps in a go
|
||||
def stepEuler(self, dt):
|
||||
return self.stepDimsplitXY(dt)
|
||||
|
||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||
if (local_dt <= 0.0):
|
||||
break
|
||||
|
||||
#Along X, then Y
|
||||
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepDimsplitXY(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
@ -121,28 +88,20 @@ class WAF:
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
self.t += dt
|
||||
|
||||
#Along Y, then X
|
||||
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
def stepDimsplitYX(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, local_dt, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(1), \
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
|
||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.pitch)
|
||||
|
||||
self.t += local_dt
|
||||
|
||||
|
||||
return self.t, 2*n
|
||||
|
||||
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||
self.data.swap()
|
||||
self.t += dt
|
||||
|
Loading…
x
Reference in New Issue
Block a user