mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-11-29 17:28:03 +01:00
Refactoring
This commit is contained in:
@@ -170,8 +170,9 @@ class CudaContext(object):
|
|||||||
"""
|
"""
|
||||||
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
|
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
|
||||||
prepared_call_args, \
|
prepared_call_args, \
|
||||||
include_dirs=[], no_extern_c=True,
|
include_dirs=[], \
|
||||||
**kwargs):
|
defines={}, \
|
||||||
|
compile_args={'no_extern_c', True}, jit_compile_args={}):
|
||||||
"""
|
"""
|
||||||
Helper function to print compilation output
|
Helper function to print compilation output
|
||||||
"""
|
"""
|
||||||
@@ -183,19 +184,20 @@ class CudaContext(object):
|
|||||||
self.logger.debug("Error: %s", error_str)
|
self.logger.debug("Error: %s", error_str)
|
||||||
|
|
||||||
kernel_filename = os.path.normpath(kernel_filename)
|
kernel_filename = os.path.normpath(kernel_filename)
|
||||||
|
kernel_path = os.path.abspath(os.path.join(self.module_path, kernel_filename))
|
||||||
#self.logger.debug("Getting %s", kernel_filename)
|
#self.logger.debug("Getting %s", kernel_filename)
|
||||||
|
|
||||||
# Create a hash of the kernel (and its includes)
|
# Create a hash of the kernel (and its includes)
|
||||||
kwargs_hasher = hashlib.md5()
|
options_hasher = hashlib.md5()
|
||||||
kwargs_hasher.update(str(kwargs).encode('utf-8'));
|
options_hasher.update(str(defines).encode('utf-8') + str(compile_args).encode('utf-8'));
|
||||||
kwargs_hash = kwargs_hasher.hexdigest()
|
options_hash = options_hasher.hexdigest()
|
||||||
kwargs_hasher = None
|
options_hasher = None
|
||||||
root, ext = os.path.splitext(kernel_filename)
|
root, ext = os.path.splitext(kernel_filename)
|
||||||
kernel_hash = root \
|
kernel_hash = root \
|
||||||
+ "_" + CudaContext.hash_kernel( \
|
+ "_" + CudaContext.hash_kernel( \
|
||||||
os.path.join(self.module_path, kernel_filename), \
|
kernel_path, \
|
||||||
include_dirs=[self.module_path] + include_dirs) \
|
include_dirs=[self.module_path] + include_dirs) \
|
||||||
+ "_" + kwargs_hash \
|
+ "_" + options_hash \
|
||||||
+ ext
|
+ ext
|
||||||
cached_kernel_filename = os.path.join(self.cache_path, kernel_hash)
|
cached_kernel_filename = os.path.join(self.cache_path, kernel_hash)
|
||||||
|
|
||||||
@@ -210,7 +212,7 @@ class CudaContext(object):
|
|||||||
|
|
||||||
with io.open(cached_kernel_filename, "rb") as file:
|
with io.open(cached_kernel_filename, "rb") as file:
|
||||||
file_str = file.read()
|
file_str = file.read()
|
||||||
module = cuda.module_from_buffer(file_str, message_handler=cuda_compile_message_handler)
|
module = cuda.module_from_buffer(file_str, message_handler=cuda_compile_message_handler, **jit_compile_args)
|
||||||
|
|
||||||
kernel = module.get_function(kernel_function_name)
|
kernel = module.get_function(kernel_function_name)
|
||||||
kernel.prepare(prepared_call_args)
|
kernel.prepare(prepared_call_args)
|
||||||
@@ -223,7 +225,7 @@ class CudaContext(object):
|
|||||||
|
|
||||||
#Create kernel string
|
#Create kernel string
|
||||||
kernel_string = ""
|
kernel_string = ""
|
||||||
for key, value in kwargs.items():
|
for key, value in defines.items():
|
||||||
kernel_string += "#define {:s} {:s}\n".format(str(key), str(value))
|
kernel_string += "#define {:s} {:s}\n".format(str(key), str(value))
|
||||||
kernel_string += '#include "{:s}"'.format(os.path.join(self.module_path, kernel_filename))
|
kernel_string += '#include "{:s}"'.format(os.path.join(self.module_path, kernel_filename))
|
||||||
if (self.use_cache):
|
if (self.use_cache):
|
||||||
@@ -235,8 +237,11 @@ class CudaContext(object):
|
|||||||
|
|
||||||
|
|
||||||
with Common.Timer("compiler") as timer:
|
with Common.Timer("compiler") as timer:
|
||||||
cubin = cuda_compiler.compile(kernel_string, include_dirs=include_dirs, no_extern_c=no_extern_c, cache_dir=False)
|
import warnings
|
||||||
module = cuda.module_from_buffer(cubin, message_handler=cuda_compile_message_handler)
|
with warnings.catch_warnings():
|
||||||
|
warnings.filterwarnings("ignore", message="The CUDA compiler succeeded, but said the following:\nkernel.cu", category=UserWarning)
|
||||||
|
cubin = cuda_compiler.compile(kernel_string, include_dirs=include_dirs, cache_dir=False, **compile_args)
|
||||||
|
module = cuda.module_from_buffer(cubin, message_handler=cuda_compile_message_handler, **jit_compile_args)
|
||||||
if (self.use_cache):
|
if (self.use_cache):
|
||||||
with io.open(cached_kernel_filename, "wb") as file:
|
with io.open(cached_kernel_filename, "wb") as file:
|
||||||
file.write(cubin)
|
file.write(cubin)
|
||||||
|
|||||||
@@ -68,8 +68,15 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_FORCE.cu", "FORCEKernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_FORCE.cu", "FORCEKernel", \
|
||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -85,7 +92,7 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
return super().simulateEuler(t_end)
|
return super().simulateEuler(t_end)
|
||||||
|
|
||||||
def stepEuler(self, dt):
|
def stepEuler(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
@@ -63,8 +63,15 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_HLL.cu", "HLLKernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_HLL.cu", "HLLKernel", \
|
||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -80,7 +87,7 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
return super().simulateEuler(t_end)
|
return super().simulateEuler(t_end)
|
||||||
|
|
||||||
def stepEuler(self, dt):
|
def stepEuler(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
@@ -69,8 +69,15 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_HLL2.cu", "HLL2Kernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_HLL2.cu", "HLL2Kernel", \
|
||||||
"iifffffiPiPiPiPiPiPi", \
|
"iifffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -89,7 +96,7 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
return self.stepDimsplitXY(dt)
|
return self.stepDimsplitXY(dt)
|
||||||
|
|
||||||
def stepDimsplitXY(self, dt):
|
def stepDimsplitXY(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
@@ -105,7 +112,7 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
@@ -70,8 +70,15 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_KP07.cu", "KP07Kernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_KP07.cu", "KP07Kernel", \
|
||||||
"iifffffiPiPiPiPiPiPi", \
|
"iifffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -87,7 +94,7 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
return super().simulateRK(t_end, 2)
|
return super().simulateRK(t_end, 2)
|
||||||
|
|
||||||
def substepRK(self, dt, substep):
|
def substepRK(self, dt, substep):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
@@ -70,8 +70,15 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_KP07_dimsplit.cu", "KP07DimsplitKernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_KP07_dimsplit.cu", "KP07DimsplitKernel", \
|
||||||
"iifffffiPiPiPiPiPiPi", \
|
"iifffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -90,7 +97,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
return self.stepDimsplitXY(dt)
|
return self.stepDimsplitXY(dt)
|
||||||
|
|
||||||
def stepDimsplitXY(self, dt):
|
def stepDimsplitXY(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
@@ -106,7 +113,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
@@ -64,8 +64,15 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
# Get kernels
|
# Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_LxF.cu", "LxFKernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_LxF.cu", "LxFKernel", \
|
||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -81,7 +88,7 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
return super().simulateEuler(t_end)
|
return super().simulateEuler(t_end)
|
||||||
|
|
||||||
def stepEuler(self, dt):
|
def stepEuler(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
@@ -55,20 +55,10 @@ class BaseSimulator:
|
|||||||
#Get logger
|
#Get logger
|
||||||
self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
|
self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
|
||||||
|
|
||||||
self.context = context
|
|
||||||
|
|
||||||
if (self.context.autotuner):
|
|
||||||
peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
|
|
||||||
block_width = int(peak_configuration["block_width"])
|
|
||||||
block_height = int(peak_configuration["block_height"])
|
|
||||||
self.logger.debug("Used autotuning to get block size [%d x %d]", block_width, block_height)
|
|
||||||
|
|
||||||
#Create a CUDA stream
|
|
||||||
self.stream = cuda.Stream()
|
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
#GPU kernel
|
#GPU kernel
|
||||||
|
self.context = context
|
||||||
self.nx = np.int32(nx)
|
self.nx = np.int32(nx)
|
||||||
self.ny = np.int32(ny)
|
self.ny = np.int32(ny)
|
||||||
self.dx = np.float32(dx)
|
self.dx = np.float32(dx)
|
||||||
@@ -76,16 +66,26 @@ class BaseSimulator:
|
|||||||
self.dt = np.float32(dt)
|
self.dt = np.float32(dt)
|
||||||
self.g = np.float32(g)
|
self.g = np.float32(g)
|
||||||
|
|
||||||
#Keep track of simulation time
|
#Handle autotuning block size
|
||||||
self.t = 0.0;
|
if (self.context.autotuner):
|
||||||
|
peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
|
||||||
|
block_width = int(peak_configuration["block_width"])
|
||||||
|
block_height = int(peak_configuration["block_height"])
|
||||||
|
self.logger.debug("Used autotuning to get block size [%d x %d]", block_width, block_height)
|
||||||
|
|
||||||
#Compute kernel launch parameters
|
#Compute kernel launch parameters
|
||||||
self.local_size = (block_width, block_height, 1)
|
self.block_size = (block_width, block_height, 1)
|
||||||
self.global_size = ( \
|
self.grid_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
int(np.ceil(self.nx / float(self.block_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
int(np.ceil(self.ny / float(self.block_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
#Create a CUDA stream
|
||||||
|
self.stream = cuda.Stream()
|
||||||
|
|
||||||
|
#Keep track of simulation time
|
||||||
|
self.t = 0.0;
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "{:s} [{:d}x{:d}]".format(self.__class__.__name__, self.nx, self.ny)
|
return "{:s} [{:d}x{:d}]".format(self.__class__.__name__, self.nx, self.ny)
|
||||||
|
|
||||||
@@ -115,7 +115,7 @@ class BaseSimulator:
|
|||||||
# Step with forward Euler
|
# Step with forward Euler
|
||||||
self.stepEuler(local_dt)
|
self.stepEuler(local_dt)
|
||||||
|
|
||||||
self.logger.info("%s simulated %f seconds to %f with %d steps in %f seconds (Euler)", self, t_end, self.t, n, t.secs)
|
self.logger.info("%s simulated %f seconds to %f with %d steps (Euler)", self, t_end, self.t, n)
|
||||||
return self.t, n
|
return self.t, n
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -123,22 +123,21 @@ class BaseSimulator:
|
|||||||
Requires that the stepRK functionality is implemented in the subclasses
|
Requires that the stepRK functionality is implemented in the subclasses
|
||||||
"""
|
"""
|
||||||
def simulateRK(self, t_end, order):
|
def simulateRK(self, t_end, order):
|
||||||
with Common.Timer(self.__class__.__name__ + ".simulateRK") as t:
|
# Compute number of timesteps to perform
|
||||||
# Compute number of timesteps to perform
|
n = int(t_end / self.dt + 1)
|
||||||
n = int(t_end / self.dt + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
for i in range(0, n):
|
||||||
# Compute timestep for "this" iteration
|
# Compute timestep for "this" iteration
|
||||||
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
local_dt = np.float32(min(self.dt, t_end-i*self.dt))
|
||||||
|
|
||||||
# Stop if end reached (should not happen)
|
# Stop if end reached (should not happen)
|
||||||
if (local_dt <= 0.0):
|
if (local_dt <= 0.0):
|
||||||
break
|
break
|
||||||
|
|
||||||
# Perform all the Runge-Kutta substeps
|
# Perform all the Runge-Kutta substeps
|
||||||
self.stepRK(local_dt, order)
|
self.stepRK(local_dt, order)
|
||||||
|
|
||||||
self.logger.info("%s simulated %f seconds to %f with %d steps in %f seconds (RK2)", self, t_end, self.t, n, t.secs)
|
self.logger.info("%s simulated %f seconds to %f with %d steps (RK2)", self, t_end, self.t, n)
|
||||||
return self.t, n
|
return self.t, n
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -146,23 +145,22 @@ class BaseSimulator:
|
|||||||
Requires that the stepDimsplitX and stepDimsplitY functionality is implemented in the subclasses
|
Requires that the stepDimsplitX and stepDimsplitY functionality is implemented in the subclasses
|
||||||
"""
|
"""
|
||||||
def simulateDimsplit(self, t_end):
|
def simulateDimsplit(self, t_end):
|
||||||
with Common.Timer(self.__class__.__name__ + ".simulateDimsplit") as t:
|
# Compute number of timesteps to perform
|
||||||
# Compute number of timesteps to perform
|
n = int(t_end / (2.0*self.dt) + 1)
|
||||||
n = int(t_end / (2.0*self.dt) + 1)
|
|
||||||
|
|
||||||
for i in range(0, n):
|
for i in range(0, n):
|
||||||
# Compute timestep for "this" iteration
|
# Compute timestep for "this" iteration
|
||||||
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
local_dt = np.float32(0.5*min(2*self.dt, t_end-2*i*self.dt))
|
||||||
|
|
||||||
# Stop if end reached (should not happen)
|
# Stop if end reached (should not happen)
|
||||||
if (local_dt <= 0.0):
|
if (local_dt <= 0.0):
|
||||||
break
|
break
|
||||||
|
|
||||||
# Perform the dimensional split substeps
|
# Perform the dimensional split substeps
|
||||||
self.stepDimsplitXY(local_dt)
|
self.stepDimsplitXY(local_dt)
|
||||||
self.stepDimsplitYX(local_dt)
|
self.stepDimsplitYX(local_dt)
|
||||||
|
|
||||||
self.logger.info("%s simulated %f seconds to %f with %d steps in %f seconds (dimsplit)", self, t_end, self.t, 2*n, t.secs)
|
self.logger.info("%s simulated %f seconds to %f with %d steps (dimsplit)", self, t_end, self.t, 2*n)
|
||||||
return self.t, 2*n
|
return self.t, 2*n
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -63,8 +63,15 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("cuda/SWE_WAF.cu", "WAFKernel", \
|
self.kernel = context.get_prepared_kernel("cuda/SWE_WAF.cu", "WAFKernel", \
|
||||||
"iiffffiPiPiPiPiPiPi", \
|
"iiffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
defines={
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
'BLOCK_WIDTH': self.block_size[0],
|
||||||
|
'BLOCK_HEIGHT': self.block_size[1]
|
||||||
|
}, \
|
||||||
|
compile_args={
|
||||||
|
'no_extern_c': True,
|
||||||
|
'options': ["--use_fast_math"],
|
||||||
|
}, \
|
||||||
|
jit_compile_args={})
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
@@ -83,7 +90,7 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
return self.stepDimsplitXY(dt)
|
return self.stepDimsplitXY(dt)
|
||||||
|
|
||||||
def stepDimsplitXY(self, dt):
|
def stepDimsplitXY(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
@@ -98,7 +105,7 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
|
|||||||
Reference in New Issue
Block a user