mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
Added superclass
This commit is contained in:
parent
8863f654be
commit
4da6fd043d
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -21,13 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
from SWESimulators import Simulator
|
||||||
|
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -42,7 +36,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations
|
Class that solves the SW equations
|
||||||
"""
|
"""
|
||||||
class FORCE:
|
class FORCE (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -63,62 +57,31 @@ class FORCE:
|
|||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
# Call super constructor
|
||||||
|
super().__init__(context, \
|
||||||
|
h0, hu0, hv0, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.force_module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
|
self.module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
|
||||||
self.force_kernel = self.force_module.get_function("FORCEKernel")
|
self.kernel = self.module.get_function("FORCEKernel")
|
||||||
self.force_kernel.prepare("iiffffPiPiPiPiPiPi")
|
self.kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 1
|
|
||||||
ghost_cells_y = 1
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
|
||||||
nx, ny, \
|
|
||||||
ghost_cells_x, ghost_cells_y, \
|
|
||||||
h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "First order centered"
|
return "First order centered"
|
||||||
|
|
||||||
|
def simulate(self, t_end):
|
||||||
|
return super().simulateEuler(t_end)
|
||||||
|
|
||||||
"""
|
def stepEuler(self, dt):
|
||||||
Function which steps n timesteps
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
self.force_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
@ -126,17 +89,7 @@ class FORCE:
|
|||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
return self.t, n
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.data.download(self.stream)
|
|
||||||
|
|
||||||
|
@ -20,13 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
from SWESimulators import Simulator
|
||||||
|
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -37,7 +31,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
|
Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
|
||||||
"""
|
"""
|
||||||
class HLL:
|
class HLL (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -58,62 +52,31 @@ class HLL:
|
|||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
# Call super constructor
|
||||||
|
super().__init__(context, \
|
||||||
|
h0, hu0, hv0, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.hll_module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
|
self.module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
|
||||||
self.hll_kernel = self.hll_module.get_function("HLLKernel")
|
self.kernel = self.module.get_function("HLLKernel")
|
||||||
self.hll_kernel.prepare("iiffffPiPiPiPiPiPi")
|
self.kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 1
|
|
||||||
ghost_cells_y = 1
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
|
||||||
nx, ny, \
|
|
||||||
ghost_cells_x, ghost_cells_y, \
|
|
||||||
h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer"
|
return "Harten-Lax-van Leer"
|
||||||
|
|
||||||
|
def simulate(self, t_end):
|
||||||
|
return super().simulateEuler(t_end)
|
||||||
|
|
||||||
"""
|
def stepEuler(self, dt):
|
||||||
Function which steps n timesteps
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
self.hll_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
@ -121,17 +84,7 @@ class HLL:
|
|||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
return self.t, n
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.data.download(self.stream)
|
|
||||||
|
|
||||||
|
@ -21,12 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from SWESimulators import Simulator
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +34,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||||
"""
|
"""
|
||||||
class HLL2:
|
class HLL2 (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -61,66 +56,36 @@ class HLL2:
|
|||||||
g, \
|
g, \
|
||||||
theta=1.8, \
|
theta=1.8, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
|
||||||
|
|
||||||
#Get kernels
|
# Call super constructor
|
||||||
self.hll2_module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
|
super().__init__(context, \
|
||||||
self.hll2_kernel = self.hll2_module.get_function("HLL2Kernel")
|
h0, hu0, hv0, \
|
||||||
self.hll2_kernel.prepare("iifffffiPiPiPiPiPiPi")
|
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 2
|
|
||||||
ghost_cells_y = 2
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
ghost_cells_x, ghost_cells_y, \
|
2, 2, \
|
||||||
h0, hu0, hv0)
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.theta = np.float32(theta)
|
self.theta = np.float32(theta)
|
||||||
|
|
||||||
#Initialize time
|
#Get kernels
|
||||||
self.t = np.float32(0.0)
|
self.module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
|
||||||
|
self.kernel = self.module.get_function("HLL2Kernel")
|
||||||
#Compute kernel launch parameters
|
self.kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer (2nd order)"
|
return "Harten-Lax-van Leer (2nd order)"
|
||||||
|
|
||||||
|
def simulate(self, t_end):
|
||||||
|
return super().simulateDimsplit(t_end)
|
||||||
|
|
||||||
"""
|
def stepEuler(self, dt):
|
||||||
Function which steps n timesteps
|
return self.stepDimsplitXY(dt)
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / (2.0*self.dt) + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
def stepDimsplitXY(self, dt):
|
||||||
#Dimensional splitting: second order accurate for every other timestep,
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
#thus run two timesteps in a go
|
|
||||||
|
|
||||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
#Along X, then Y
|
|
||||||
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
@ -131,11 +96,12 @@ class HLL2:
|
|||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
|
|
||||||
#Along Y, then X
|
def stepDimsplitYX(self, dt):
|
||||||
self.hll2_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
@ -146,14 +112,5 @@ class HLL2:
|
|||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
|
|
||||||
return self.t, 2*n
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.data.download(self.stream)
|
|
||||||
|
|
@ -26,12 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from SWESimulators import Simulator
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -40,7 +35,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||||
"""
|
"""
|
||||||
class KP07:
|
class KP07 (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -53,131 +48,66 @@ class KP07:
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
f: Coriolis parameter (1.2e-4 s^1)
|
|
||||||
r: Bottom friction coefficient (2.4e-3 m/s)
|
r: Bottom friction coefficient (2.4e-3 m/s)
|
||||||
wind_type: Type of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
|
||||||
wind_tau0: Amplitude of wind stress (Pa)
|
|
||||||
wind_rho: Density of sea water (1025.0 kg / m^3)
|
|
||||||
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
|
||||||
wind_xm: Maximum wind stress for bell shaped wind stress
|
|
||||||
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
|
||||||
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
|
||||||
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
|
||||||
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
|
||||||
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
context, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
h0, hu0, hv0, \
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, theta=1.3, \
|
g, \
|
||||||
r=0.0, use_rk2=True,
|
theta=1.3, r=0.0, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
|
||||||
|
|
||||||
#Get kernels
|
# Call super constructor
|
||||||
self.kp07_module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
|
super().__init__(context, \
|
||||||
self.kp07_kernel = self.kp07_module.get_function("KP07Kernel")
|
h0, hu0, hv0, \
|
||||||
self.kp07_kernel.prepare("iiffffffiPiPiPiPiPiPi")
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 2
|
|
||||||
ghost_cells_y = 2
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.theta = np.float32(theta)
|
self.theta = np.float32(theta)
|
||||||
self.r = np.float32(r)
|
self.r = np.float32(r)
|
||||||
self.use_rk2 = use_rk2
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
#Get kernels
|
||||||
|
self.module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
|
||||||
|
self.kernel = self.module.get_function("KP07Kernel")
|
||||||
|
self.kernel.prepare("iiffffffiPiPiPiPiPiPi")
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova 2007"
|
return "Kurganov-Petrova 2007"
|
||||||
|
|
||||||
"""
|
def simulate(self, t_end):
|
||||||
Function which steps n timesteps
|
return super().simulateRK(t_end, 2)
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
def substepRK(self, dt, substep):
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
if (self.use_rk2):
|
|
||||||
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
self.r, \
|
self.r, \
|
||||||
np.int32(0), \
|
np.int32(substep), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
self.data.swap()
|
||||||
|
|
||||||
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
def stepEuler(self, dt):
|
||||||
self.nx, self.ny, \
|
self.substepRK(dt, 0)
|
||||||
self.dx, self.dy, local_dt, \
|
self.t += dt
|
||||||
self.g, \
|
|
||||||
self.theta, \
|
|
||||||
self.r, \
|
|
||||||
np.int32(1), \
|
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
|
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch)
|
|
||||||
else:
|
|
||||||
self.kp07_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
|
||||||
self.dx, self.dy, local_dt, \
|
|
||||||
self.g, \
|
|
||||||
self.theta, \
|
|
||||||
self.r, \
|
|
||||||
np.int32(0), \
|
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
|
||||||
self.cl_data.swap()
|
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
|
|
||||||
return self.t, n
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def stepRK(self, dt, order):
|
||||||
|
if (order != 2):
|
||||||
|
raise NotImplementedError("Only second order implemented")
|
||||||
|
self.substepRK(dt, 0)
|
||||||
|
self.substepRK(dt, 1)
|
||||||
|
self.t += dt
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.data.download(self.stream)
|
return self.data.download(self.stream)
|
||||||
|
@ -26,12 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from SWESimulators import Simulator
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -40,7 +35,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the dimentionally split KP07 scheme
|
Class that solves the SW equations using the dimentionally split KP07 scheme
|
||||||
"""
|
"""
|
||||||
class KP07_dimsplit:
|
class KP07_dimsplit (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -62,64 +57,36 @@ class KP07_dimsplit:
|
|||||||
g, \
|
g, \
|
||||||
theta=1.3, \
|
theta=1.3, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
|
||||||
|
|
||||||
#Get kernels
|
# Call super constructor
|
||||||
self.kp07_dimsplit_module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
|
super().__init__(context, \
|
||||||
self.kp07_dimsplit_kernel = self.kp07_dimsplit_module.get_function("KP07DimsplitKernel")
|
h0, hu0, hv0, \
|
||||||
self.kp07_dimsplit_kernel.prepare("iifffffiPiPiPiPiPiPi")
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 2
|
|
||||||
ghost_cells_y = 2
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
self.theta = np.float32(theta)
|
self.theta = np.float32(theta)
|
||||||
|
|
||||||
#Initialize time
|
#Get kernels
|
||||||
self.t = np.float32(0.0)
|
self.module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
|
||||||
|
self.kernel = self.module.get_function("KP07DimsplitKernel")
|
||||||
#Compute kernel launch parameters
|
self.kernel.prepare("iifffffiPiPiPiPiPiPi")
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova 2007 dimensionally split"
|
return "Kurganov-Petrova 2007 dimensionally split"
|
||||||
|
|
||||||
|
def simulate(self, t_end):
|
||||||
|
return super().simulateDimsplit(t_end)
|
||||||
|
|
||||||
"""
|
def stepEuler(self, dt):
|
||||||
Function which steps n timesteps
|
return self.stepDimsplitXY(dt)
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / (2.0*self.dt) + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
def stepDimsplitXY(self, dt):
|
||||||
#Dimensional splitting: second order accurate for every other timestep,
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
#thus run two timesteps in a go
|
|
||||||
|
|
||||||
#Compute timestep
|
|
||||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
#Along X, then Y
|
|
||||||
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
@ -130,11 +97,12 @@ class KP07_dimsplit:
|
|||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
|
|
||||||
#Along Y, then X
|
def stepDimsplitYX(self, dt):
|
||||||
self.kp07_dimsplit_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
@ -145,15 +113,6 @@ class KP07_dimsplit:
|
|||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
self.t += 2.0*local_dt
|
|
||||||
|
|
||||||
|
|
||||||
return self.t, 2*n
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.data.download(self.stream)
|
|
||||||
|
|
||||||
|
@ -21,13 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
from SWESimulators import Simulator
|
||||||
|
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -38,7 +32,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the Lax Friedrichs scheme
|
Class that solves the SW equations using the Lax Friedrichs scheme
|
||||||
"""
|
"""
|
||||||
class LxF:
|
class LxF (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -59,62 +53,31 @@ class LxF:
|
|||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
# Call super constructor
|
||||||
|
super().__init__(context, \
|
||||||
|
h0, hu0, hv0, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
# Get kernels
|
# Get kernels
|
||||||
self.lxf_module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
|
self.module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
|
||||||
self.lxf_kernel = self.lxf_module.get_function("LxFKernel")
|
self.kernel = self.module.get_function("LxFKernel")
|
||||||
self.lxf_kernel.prepare("iiffffPiPiPiPiPiPi")
|
self.kernel.prepare("iiffffPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 1
|
|
||||||
ghost_cells_y = 1
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
|
||||||
nx, ny, \
|
|
||||||
ghost_cells_x, ghost_cells_y, \
|
|
||||||
h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Lax Friedrichs"
|
return "Lax Friedrichs"
|
||||||
|
|
||||||
"""
|
def simulate(self, t_end):
|
||||||
Function which steps n timesteps
|
return super().simulateEuler(t_end)
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
def stepEuler(self, dt):
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
|
|
||||||
if (local_dt <= 0.0):
|
|
||||||
break
|
|
||||||
|
|
||||||
self.lxf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
@ -122,17 +85,7 @@ class LxF:
|
|||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
|
||||||
self.t += local_dt
|
|
||||||
|
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
return self.t, n
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.data.download(self.stream)
|
|
||||||
|
|
||||||
|
179
SWESimulators/Simulator.py
Normal file
179
SWESimulators/Simulator.py
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
This python module implements the classical Lax-Friedrichs numerical
|
||||||
|
scheme for the shallow water equations
|
||||||
|
|
||||||
|
Copyright (C) 2016 SINTEF ICT
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
#Import packages we need
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import pycuda.compiler as cuda_compiler
|
||||||
|
import pycuda.gpuarray
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
from SWESimulators import Common
|
||||||
|
|
||||||
|
|
||||||
|
class BaseSimulator:
|
||||||
|
"""
|
||||||
|
Initialization routine
|
||||||
|
context: GPU context to use
|
||||||
|
kernel_wrapper: wrapper function of GPU kernel
|
||||||
|
h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
|
||||||
|
hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||||
|
hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
|
||||||
|
nx: Number of cells along x-axis
|
||||||
|
ny: Number of cells along y-axis
|
||||||
|
dx: Grid cell spacing along x-axis (20 000 m)
|
||||||
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
|
dt: Size of each timestep (90 s)
|
||||||
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
"""
|
||||||
|
def __init__(self, \
|
||||||
|
context, \
|
||||||
|
h0, hu0, hv0, \
|
||||||
|
nx, ny, \
|
||||||
|
ghost_cells_x, ghost_cells_y, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height):
|
||||||
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
ghost_cells_x, ghost_cells_y, \
|
||||||
|
h0, hu0, hv0)
|
||||||
|
|
||||||
|
#Save input parameters
|
||||||
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
|
#GPU kernel
|
||||||
|
self.nx = np.int32(nx)
|
||||||
|
self.ny = np.int32(ny)
|
||||||
|
self.dx = np.float32(dx)
|
||||||
|
self.dy = np.float32(dy)
|
||||||
|
self.dt = np.float32(dt)
|
||||||
|
self.g = np.float32(g)
|
||||||
|
|
||||||
|
#Keep track of simulation time
|
||||||
|
self.t = 0.0;
|
||||||
|
|
||||||
|
#Compute kernel launch parameters
|
||||||
|
self.local_size = (block_width, block_height, 1)
|
||||||
|
self.global_size = ( \
|
||||||
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
|
)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Function which simulates forward in time using the default simulation type
|
||||||
|
"""
|
||||||
|
def simulate(self, t_end):
|
||||||
|
raise(exceptions.NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
|
"""
|
||||||
|
Function which simulates t_end seconds using forward Euler
|
||||||
|
Requires that the stepEuler functionality is implemented in the subclasses
|
||||||
|
"""
|
||||||
|
def simulateEuler(self, t_end):
|
||||||
|
# Compute number of timesteps to perform
|
||||||
|
n = int(t_end / self.dt + 1)
|
||||||
|
|
||||||
|
for i in range(0, n):
|
||||||
|
# Compute timestep for "this" iteration
|
||||||
|
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||||
|
|
||||||
|
# Stop if end reached (should not happen)
|
||||||
|
if (local_dt <= 0.0):
|
||||||
|
break
|
||||||
|
|
||||||
|
# Step with forward Euler
|
||||||
|
self.stepEuler(local_dt)
|
||||||
|
|
||||||
|
return self.t, n
|
||||||
|
|
||||||
|
"""
|
||||||
|
Function which simulates t_end seconds using Runge-Kutta 2
|
||||||
|
Requires that the stepRK functionality is implemented in the subclasses
|
||||||
|
"""
|
||||||
|
def simulateRK(self, t_end, order):
|
||||||
|
|
||||||
|
# Compute number of timesteps to perform
|
||||||
|
n = int(t_end / self.dt + 1)
|
||||||
|
|
||||||
|
for i in range(0, n):
|
||||||
|
# Compute timestep for "this" iteration
|
||||||
|
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||||
|
|
||||||
|
# Stop if end reached (should not happen)
|
||||||
|
if (local_dt <= 0.0):
|
||||||
|
break
|
||||||
|
|
||||||
|
# Perform all the Runge-Kutta substeps
|
||||||
|
self.stepRK(local_dt, order)
|
||||||
|
|
||||||
|
return self.t, n
|
||||||
|
|
||||||
|
"""
|
||||||
|
Function which simulates t_end seconds using second order dimensional splitting (XYYX)
|
||||||
|
Requires that the stepDimsplitX and stepDimsplitY functionality is implemented in the subclasses
|
||||||
|
"""
|
||||||
|
def simulateDimsplit(self, t_end):
|
||||||
|
|
||||||
|
# Compute number of timesteps to perform
|
||||||
|
n = int(t_end / (2.0*self.dt) + 1)
|
||||||
|
|
||||||
|
for i in range(0, n):
|
||||||
|
# Compute timestep for "this" iteration
|
||||||
|
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||||
|
|
||||||
|
# Stop if end reached (should not happen)
|
||||||
|
if (local_dt <= 0.0):
|
||||||
|
break
|
||||||
|
|
||||||
|
# Perform the dimensional split substeps
|
||||||
|
self.stepDimsplitXY(local_dt)
|
||||||
|
self.stepDimsplitYX(local_dt)
|
||||||
|
|
||||||
|
|
||||||
|
return self.t, 2*n
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Function which performs one single timestep of size dt using forward euler
|
||||||
|
"""
|
||||||
|
def stepEuler(self, dt):
|
||||||
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
|
def stepRK(self, dt, substep):
|
||||||
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
|
def stepDimsplitXY(self, dt):
|
||||||
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
|
def stepDimsplitYX(self, dt):
|
||||||
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
|
def sim_time(self):
|
||||||
|
return self.t
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.data.download(self.stream)
|
||||||
|
|
@ -22,12 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from SWESimulators import Simulator
|
||||||
import pycuda.compiler as cuda_compiler
|
|
||||||
import pycuda.gpuarray
|
|
||||||
import pycuda.driver as cuda
|
|
||||||
|
|
||||||
from SWESimulators import Common
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -36,7 +31,7 @@ from SWESimulators import Common
|
|||||||
"""
|
"""
|
||||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||||
"""
|
"""
|
||||||
class WAF:
|
class WAF (Simulator.BaseSimulator):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
@ -57,62 +52,34 @@ class WAF:
|
|||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16):
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
# Call super constructor
|
||||||
|
super().__init__(context, \
|
||||||
|
h0, hu0, hv0, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
dx, dy, dt, \
|
||||||
|
g, \
|
||||||
|
block_width, block_height);
|
||||||
|
|
||||||
#Get kernels
|
#Get kernels
|
||||||
self.waf_module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
|
self.module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
|
||||||
self.waf_kernel = self.waf_module.get_function("WAFKernel")
|
self.kernel = self.module.get_function("WAFKernel")
|
||||||
self.waf_kernel.prepare("iiffffiPiPiPiPiPiPi")
|
self.kernel.prepare("iiffffiPiPiPiPiPiPi")
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
ghost_cells_x = 2
|
|
||||||
ghost_cells_y = 2
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, nx, ny, ghost_cells_x, ghost_cells_y, h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
|
||||||
#OpenCL kernel
|
|
||||||
self.nx = np.int32(nx)
|
|
||||||
self.ny = np.int32(ny)
|
|
||||||
self.dx = np.float32(dx)
|
|
||||||
self.dy = np.float32(dy)
|
|
||||||
self.dt = np.float32(dt)
|
|
||||||
self.g = np.float32(g)
|
|
||||||
|
|
||||||
#Initialize time
|
|
||||||
self.t = np.float32(0.0)
|
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
|
||||||
self.local_size = (block_width, block_height, 1)
|
|
||||||
self.global_size = ( \
|
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Weighted average flux"
|
return "Weighted average flux"
|
||||||
|
|
||||||
"""
|
def simulate(self, t_end):
|
||||||
Function which steps n timesteps
|
return super().simulateDimsplit(t_end)
|
||||||
"""
|
|
||||||
def step(self, t_end=0.0):
|
|
||||||
n = int(t_end / (2.0*self.dt) + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
def stepEuler(self, dt):
|
||||||
#Dimensional splitting: second order accurate for every other timestep,
|
return self.stepDimsplitXY(dt)
|
||||||
#thus run two timesteps in a go
|
|
||||||
|
|
||||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
def stepDimsplitXY(self, dt):
|
||||||
if (local_dt <= 0.0):
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
break
|
|
||||||
|
|
||||||
#Along X, then Y
|
|
||||||
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
@ -121,28 +88,20 @@ class WAF:
|
|||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
self.data.swap()
|
||||||
|
self.t += dt
|
||||||
|
|
||||||
#Along Y, then X
|
def stepDimsplitYX(self, dt):
|
||||||
self.waf_kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, local_dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch, \
|
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch)
|
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
||||||
|
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
||||||
self.t += local_dt
|
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
||||||
|
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
||||||
|
self.data.swap()
|
||||||
return self.t, 2*n
|
self.t += dt
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def download(self):
|
|
||||||
return self.data.download(self.stream)
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user