mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
Working prototype of autotuning
This commit is contained in:
parent
f60ceaa316
commit
803ce8ab70
387
Autotuning.ipynb
387
Autotuning.ipynb
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
277
GPUSimulators/Autotuner.py
Normal file
277
GPUSimulators/Autotuner.py
Normal file
@ -0,0 +1,277 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
This python module implements the different helper functions and
|
||||||
|
classes
|
||||||
|
|
||||||
|
Copyright (C) 2018 SINTEF ICT
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import gc
|
||||||
|
import numpy as np
|
||||||
|
import logging
|
||||||
|
from socket import gethostname
|
||||||
|
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
|
||||||
|
from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF
|
||||||
|
|
||||||
|
class Autotuner:
|
||||||
|
def __init__(self,
|
||||||
|
nx=2048, ny=2048,
|
||||||
|
block_widths=range(8, 32, 2),
|
||||||
|
block_heights=range(8, 32, 2)):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
self.filename = "autotuning_data_" + gethostname() + ".npz"
|
||||||
|
self.nx = nx
|
||||||
|
self.ny = ny
|
||||||
|
self.block_widths = block_widths
|
||||||
|
self.block_heights = block_heights
|
||||||
|
self.performance = {}
|
||||||
|
|
||||||
|
|
||||||
|
def benchmark(self, simulator, force=False):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
#Run through simulators and benchmark
|
||||||
|
key = str(simulator.__name__)
|
||||||
|
logger.info("Benchmarking %s to %s", key, self.filename)
|
||||||
|
|
||||||
|
#If this simulator has been benchmarked already, skip it
|
||||||
|
if (force==False and os.path.isfile(self.filename)):
|
||||||
|
with np.load(self.filename) as data:
|
||||||
|
if key in data["simulators"]:
|
||||||
|
logger.info("%s already benchmarked - skipping", key)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Set arguments to send to the simulators during construction
|
||||||
|
context = Common.CudaContext(autotuning=False)
|
||||||
|
g = 9.81
|
||||||
|
h0, hu0, hv0, dx, dy, dt = Autotuner.gen_test_data(nx=self.nx, ny=self.ny, g=g)
|
||||||
|
arguments = {
|
||||||
|
'context': context,
|
||||||
|
'h0': h0, 'hu0': hu0, 'hv0': hv0,
|
||||||
|
'nx': self.nx, 'ny': self.ny,
|
||||||
|
'dx': dx, 'dy': dy, 'dt': 0.9*dt,
|
||||||
|
'g': g
|
||||||
|
}
|
||||||
|
|
||||||
|
# Load existing data into memory
|
||||||
|
benchmark_data = {
|
||||||
|
"simulators": [],
|
||||||
|
}
|
||||||
|
if (os.path.isfile(self.filename)):
|
||||||
|
with np.load(self.filename) as data:
|
||||||
|
for k, v in data.items():
|
||||||
|
benchmark_data[k] = v
|
||||||
|
|
||||||
|
# Run benchmark
|
||||||
|
benchmark_data[key + "_megacells"] = Autotuner.benchmark_single_simulator(simulator, arguments, self.block_widths, self.block_heights)
|
||||||
|
benchmark_data[key + "_block_widths"] = self.block_widths
|
||||||
|
benchmark_data[key + "_block_heights"] = self.block_heights
|
||||||
|
benchmark_data[key + "_arguments"] = str(arguments)
|
||||||
|
|
||||||
|
existing_sims = benchmark_data["simulators"]
|
||||||
|
if (isinstance(existing_sims, np.ndarray)):
|
||||||
|
existing_sims = existing_sims.tolist()
|
||||||
|
if (key not in existing_sims):
|
||||||
|
benchmark_data["simulators"] = existing_sims + [key]
|
||||||
|
|
||||||
|
# Save to file
|
||||||
|
np.savez_compressed(self.filename, **benchmark_data)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Function which reads a numpy file with autotuning data
|
||||||
|
and reports the maximum performance and block size
|
||||||
|
"""
|
||||||
|
def get_peak_performance(self, simulator):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
assert issubclass(simulator, Simulator.BaseSimulator)
|
||||||
|
key = simulator.__name__
|
||||||
|
|
||||||
|
if (key in self.performance):
|
||||||
|
return self.performance[key]
|
||||||
|
else:
|
||||||
|
#Run simulation if required
|
||||||
|
if (not os.path.isfile(self.filename)):
|
||||||
|
logger.debug("Could not get autotuned peak performance for %s: benchmarking", key)
|
||||||
|
self.benchmark(simulator)
|
||||||
|
|
||||||
|
with np.load(self.filename) as data:
|
||||||
|
if key not in data['simulators']:
|
||||||
|
logger.debug("Could not get autotuned peak performance for %s: benchmarking", key)
|
||||||
|
data.close()
|
||||||
|
self.benchmark(simulator)
|
||||||
|
data = np.load(self.filename)
|
||||||
|
|
||||||
|
def find_max_index(megacells):
|
||||||
|
max_index = np.nanargmax(megacells)
|
||||||
|
return np.unravel_index(max_index, megacells.shape)
|
||||||
|
|
||||||
|
megacells = data[key + '_megacells']
|
||||||
|
block_widths = data[key + '_block_widths']
|
||||||
|
block_heights = data[key + '_block_heights']
|
||||||
|
j, i = find_max_index(megacells)
|
||||||
|
|
||||||
|
self.performance[key] = { "block_width": block_widths[i],
|
||||||
|
"block_height": block_heights[j],
|
||||||
|
"megacells": megacells[j, i] }
|
||||||
|
logger.debug("Returning %s as peak performance parameters", self.performance[key])
|
||||||
|
return self.performance[key]
|
||||||
|
|
||||||
|
#This should never happen
|
||||||
|
raise "Something wrong: Could not get autotuning data!"
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Runs a set of benchmarks for a single simulator
|
||||||
|
"""
|
||||||
|
def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
megacells = np.empty((len(block_heights), len(block_widths)))
|
||||||
|
megacells.fill(np.nan)
|
||||||
|
|
||||||
|
logger.debug("Running %d benchmarks with %s", len(block_heights)*len(block_widths), simulator.__name__)
|
||||||
|
|
||||||
|
sim_arguments = arguments.copy()
|
||||||
|
|
||||||
|
with Common.Timer(simulator.__name__) as t:
|
||||||
|
for j, block_height in enumerate(block_heights):
|
||||||
|
sim_arguments.update({'block_height': block_height})
|
||||||
|
for i, block_width in enumerate(block_widths):
|
||||||
|
sim_arguments.update({'block_width': block_width})
|
||||||
|
megacells[j, i] = Autotuner.run_benchmark(simulator, sim_arguments)
|
||||||
|
|
||||||
|
|
||||||
|
logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs)
|
||||||
|
|
||||||
|
return megacells
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Runs a benchmark, and returns the number of megacells achieved
|
||||||
|
"""
|
||||||
|
def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
#Initialize simulator
|
||||||
|
try:
|
||||||
|
sim = simulator(**arguments)
|
||||||
|
except:
|
||||||
|
#An exception raised - not possible to continue
|
||||||
|
logger.debug("Failed creating %s with arguments %s", simulator.__name__, str(arguments))
|
||||||
|
return np.nan
|
||||||
|
|
||||||
|
#Create timer events
|
||||||
|
start = cuda.Event()
|
||||||
|
end = cuda.Event()
|
||||||
|
|
||||||
|
#Warmup
|
||||||
|
for i in range(warmup_timesteps):
|
||||||
|
sim.stepEuler(sim.dt)
|
||||||
|
|
||||||
|
#Run simulation with timer
|
||||||
|
start.record(sim.stream)
|
||||||
|
for i in range(timesteps):
|
||||||
|
sim.stepEuler(sim.dt)
|
||||||
|
end.record(sim.stream)
|
||||||
|
|
||||||
|
#Synchronize end event
|
||||||
|
end.synchronize()
|
||||||
|
|
||||||
|
#Compute megacells
|
||||||
|
gpu_elapsed = end.time_since(start)*1.0e-3
|
||||||
|
megacells = (sim.nx*sim.ny*timesteps / (1000*1000)) / gpu_elapsed
|
||||||
|
|
||||||
|
#Sanity check solution
|
||||||
|
h, hu, hv = sim.download()
|
||||||
|
sane = True
|
||||||
|
sane = sane and Autotuner.sanity_check(h, 0.3, 0.7)
|
||||||
|
sane = sane and Autotuner.sanity_check(hu, -0.2, 0.2)
|
||||||
|
sane = sane and Autotuner.sanity_check(hv, -0.2, 0.2)
|
||||||
|
|
||||||
|
if (sane):
|
||||||
|
logger.debug("%s [%d x %d] succeeded: %f megacells, gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], megacells, gpu_elapsed)
|
||||||
|
return megacells
|
||||||
|
else:
|
||||||
|
logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
|
||||||
|
return np.nan
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
Generates test dataset
|
||||||
|
"""
|
||||||
|
def gen_test_data(nx, ny, g):
|
||||||
|
width = 100.0
|
||||||
|
height = 100.0
|
||||||
|
dx = width / float(nx)
|
||||||
|
dy = height / float(ny)
|
||||||
|
|
||||||
|
x_center = dx*nx/2.0
|
||||||
|
y_center = dy*ny/2.0
|
||||||
|
|
||||||
|
#Create a gaussian "dam break" that will not form shocks
|
||||||
|
size = width / 5.0
|
||||||
|
dt = 10**10
|
||||||
|
|
||||||
|
h = np.zeros((ny, nx), dtype=np.float32);
|
||||||
|
hu = np.zeros((ny, nx), dtype=np.float32);
|
||||||
|
hv = np.zeros((ny, nx), dtype=np.float32);
|
||||||
|
|
||||||
|
x = dx*(np.arange(0, nx, dtype=np.float32)+0.5) - x_center
|
||||||
|
y = dy*(np.arange(0, ny, dtype=np.float32)+0.5) - y_center
|
||||||
|
xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
|
||||||
|
r = np.sqrt(xv**2 + yv**2)
|
||||||
|
xv = None
|
||||||
|
yv = None
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
#Generate highres
|
||||||
|
h = 0.5 + 0.1*0.5*(1.0 + np.cos(np.pi*r/size)) * (r < size)
|
||||||
|
hu = 0.1*0.5*(1.0 + np.cos(np.pi*r/size)) * (r < size)
|
||||||
|
hv = 0.1*0.5*(1.0 + np.cos(np.pi*r/size)) * (r < size)
|
||||||
|
|
||||||
|
scale = 0.7
|
||||||
|
max_h_estimate = 0.6
|
||||||
|
max_u_estimate = 0.1*np.sqrt(2.0)
|
||||||
|
dx = width/nx
|
||||||
|
dy = height/ny
|
||||||
|
dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(g*max_h_estimate))
|
||||||
|
|
||||||
|
return h, hu, hv, dx, dy, dt
|
||||||
|
|
||||||
|
"""
|
||||||
|
Checks that a variable is "sane"
|
||||||
|
"""
|
||||||
|
def sanity_check(variable, bound_min, bound_max):
|
||||||
|
maxval = np.amax(variable)
|
||||||
|
minval = np.amin(variable)
|
||||||
|
if (np.isnan(maxval)
|
||||||
|
or np.isnan(minval)
|
||||||
|
or maxval > bound_max
|
||||||
|
or minval < bound_min):
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
@ -34,7 +34,7 @@ import pycuda.compiler as cuda_compiler
|
|||||||
import pycuda.gpuarray
|
import pycuda.gpuarray
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
from GPUSimulators import Autotuner
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Class which keeps track of time spent for a section of code
|
Class which keeps track of time spent for a section of code
|
||||||
@ -64,8 +64,7 @@ Class which keeps track of the CUDA context and some helper functions
|
|||||||
"""
|
"""
|
||||||
class CudaContext(object):
|
class CudaContext(object):
|
||||||
|
|
||||||
def __init__(self, verbose=True, blocking=False, use_cache=True):
|
def __init__(self, blocking=False, use_cache=True, autotuning=True):
|
||||||
self.verbose = verbose
|
|
||||||
self.blocking = blocking
|
self.blocking = blocking
|
||||||
self.use_cache = use_cache
|
self.use_cache = use_cache
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
@ -76,12 +75,13 @@ class CudaContext(object):
|
|||||||
#Initialize cuda (must be first call to PyCUDA)
|
#Initialize cuda (must be first call to PyCUDA)
|
||||||
cuda.init(flags=0)
|
cuda.init(flags=0)
|
||||||
|
|
||||||
|
self.logger.info("PyCUDA version %s", str(pycuda.VERSION_TEXT))
|
||||||
|
|
||||||
#Print some info about CUDA
|
#Print some info about CUDA
|
||||||
self.logger.info("CUDA version %s", str(cuda.get_version()))
|
self.logger.info("CUDA version %s", str(cuda.get_version()))
|
||||||
self.logger.info("Driver version %s", str(cuda.get_driver_version()))
|
self.logger.info("Driver version %s", str(cuda.get_driver_version()))
|
||||||
|
|
||||||
self.cuda_device = cuda.Device(0)
|
self.cuda_device = cuda.Device(0)
|
||||||
if (self.verbose):
|
|
||||||
self.logger.info("Using '%s' GPU", self.cuda_device.name())
|
self.logger.info("Using '%s' GPU", self.cuda_device.name())
|
||||||
self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
|
self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
|
||||||
self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))
|
self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))
|
||||||
@ -102,6 +102,11 @@ class CudaContext(object):
|
|||||||
os.mkdir(self.cache_path)
|
os.mkdir(self.cache_path)
|
||||||
self.logger.debug("Using CUDA cache dir %s", self.cache_path)
|
self.logger.debug("Using CUDA cache dir %s", self.cache_path)
|
||||||
|
|
||||||
|
self.autotuner = None
|
||||||
|
if (autotuning):
|
||||||
|
self.logger.info("Autotuning enabled. It may take several minutes to run the code the first time: have patience")
|
||||||
|
self.autotuner = Autotuner.Autotuner()
|
||||||
|
|
||||||
|
|
||||||
def __del__(self, *args):
|
def __del__(self, *args):
|
||||||
self.logger.info("Cleaning up CUDA context handle <%s>", str(self.cuda_context.handle))
|
self.logger.info("Cleaning up CUDA context handle <%s>", str(self.cuda_context.handle))
|
||||||
@ -131,7 +136,7 @@ class CudaContext(object):
|
|||||||
return "CudaContext id " + str(self.cuda_context.handle)
|
return "CudaContext id " + str(self.cuda_context.handle)
|
||||||
|
|
||||||
|
|
||||||
def hash_kernel(kernel_filename, include_dirs, verbose=False):
|
def hash_kernel(kernel_filename, include_dirs):
|
||||||
# Generate a kernel ID for our caches
|
# Generate a kernel ID for our caches
|
||||||
num_includes = 0
|
num_includes = 0
|
||||||
max_includes = 100
|
max_includes = 100
|
||||||
@ -147,7 +152,7 @@ class CudaContext(object):
|
|||||||
|
|
||||||
filename = files.pop()
|
filename = files.pop()
|
||||||
|
|
||||||
logger.debug("Hashing %s", filename)
|
#logger.debug("Hashing %s", filename)
|
||||||
|
|
||||||
modified = os.path.getmtime(filename)
|
modified = os.path.getmtime(filename)
|
||||||
|
|
||||||
@ -183,7 +188,7 @@ class CudaContext(object):
|
|||||||
"""
|
"""
|
||||||
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
|
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
|
||||||
prepared_call_args, \
|
prepared_call_args, \
|
||||||
include_dirs=[], verbose=False, no_extern_c=True,
|
include_dirs=[], no_extern_c=True,
|
||||||
**kwargs):
|
**kwargs):
|
||||||
"""
|
"""
|
||||||
Helper function to print compilation output
|
Helper function to print compilation output
|
||||||
@ -195,7 +200,7 @@ class CudaContext(object):
|
|||||||
if error_str:
|
if error_str:
|
||||||
self.logger.debug("Error: %s", error_str)
|
self.logger.debug("Error: %s", error_str)
|
||||||
|
|
||||||
self.logger.debug("Getting %s", kernel_filename)
|
#self.logger.debug("Getting %s", kernel_filename)
|
||||||
|
|
||||||
# Create a hash of the kernel (and its includes)
|
# Create a hash of the kernel (and its includes)
|
||||||
kwargs_hasher = hashlib.md5()
|
kwargs_hasher = hashlib.md5()
|
||||||
@ -206,8 +211,7 @@ class CudaContext(object):
|
|||||||
kernel_hash = root \
|
kernel_hash = root \
|
||||||
+ "_" + CudaContext.hash_kernel( \
|
+ "_" + CudaContext.hash_kernel( \
|
||||||
os.path.join(self.module_path, kernel_filename), \
|
os.path.join(self.module_path, kernel_filename), \
|
||||||
include_dirs=[self.module_path] + include_dirs, \
|
include_dirs=[self.module_path] + include_dirs) \
|
||||||
verbose=verbose) \
|
|
||||||
+ "_" + kwargs_hash \
|
+ "_" + kwargs_hash \
|
||||||
+ ext
|
+ ext
|
||||||
cached_kernel_filename = os.path.join(self.cache_path, kernel_hash)
|
cached_kernel_filename = os.path.join(self.cache_path, kernel_hash)
|
||||||
@ -278,55 +282,102 @@ class CudaContext(object):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Class that holds data
|
Class that holds data
|
||||||
"""
|
"""
|
||||||
class CUDAArray2D:
|
class CudaArray2D:
|
||||||
"""
|
"""
|
||||||
Uploads initial data to the CL device
|
Uploads initial data to the CL device
|
||||||
"""
|
"""
|
||||||
def __init__(self, stream, nx, ny, halo_x, halo_y, data):
|
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data):
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.nx = nx
|
self.nx = nx
|
||||||
self.ny = ny
|
self.ny = ny
|
||||||
self.nx_halo = nx + 2*halo_x
|
self.x_halo = x_halo
|
||||||
self.ny_halo = ny + 2*halo_y
|
self.y_halo = y_halo
|
||||||
|
|
||||||
self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
nx_halo = nx + 2*x_halo
|
||||||
|
ny_halo = ny + 2*y_halo
|
||||||
|
|
||||||
|
#self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
||||||
|
|
||||||
#Make sure data is in proper format
|
#Make sure data is in proper format
|
||||||
assert np.issubdtype(data.dtype, np.float32), "Wrong datatype: %s" % str(data.dtype)
|
assert np.issubdtype(cpu_data.dtype, np.float32), "Wrong datatype: %s" % str(cpu_data.dtype)
|
||||||
assert not np.isfortran(data), "Wrong datatype (Fortran, expected C)"
|
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||||
assert data.shape == (self.ny_halo, self.nx_halo), "Wrong data shape: %s vs %s" % (str(data.shape), str((self.ny_halo, self.nx_halo)))
|
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||||
|
|
||||||
#Upload data to the device
|
#Upload data to the device
|
||||||
self.data = pycuda.gpuarray.to_gpu_async(data, stream=stream)
|
if (cpu_data.shape == (ny_halo, nx_halo)):
|
||||||
|
self.data = pycuda.gpuarray.to_gpu_async(cpu_data, stream=stream)
|
||||||
|
elif (cpu_data.shape == (self.ny, self.nx)):
|
||||||
|
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||||
|
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), cpu_data.dtype)
|
||||||
|
#self.data.fill(0.0)
|
||||||
|
|
||||||
self.bytes_per_float = data.itemsize
|
#Create copy object from host to device
|
||||||
assert(self.bytes_per_float == 4)
|
copy = cuda.Memcpy2D()
|
||||||
self.pitch = np.int32((self.nx_halo)*self.bytes_per_float)
|
copy.set_src_host(cpu_data)
|
||||||
self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
|
copy.set_dst_device(self.data.gpudata)
|
||||||
|
|
||||||
|
#Set offsets and pitch of destination
|
||||||
|
copy.dst_x_in_bytes = self.x_halo*self.data.strides[1]
|
||||||
|
copy.dst_y = self.y_halo
|
||||||
|
copy.dst_pitch = self.data.strides[0]
|
||||||
|
|
||||||
|
#Set width in bytes to copy for each row and
|
||||||
|
#number of rows to copy
|
||||||
|
copy.width_in_bytes = self.nx*cpu_data.itemsize
|
||||||
|
copy.height = self.ny
|
||||||
|
|
||||||
|
#Perform the copy
|
||||||
|
copy(stream)
|
||||||
|
stream.synchronize()
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert False, "Wrong data shape: %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo)))
|
||||||
|
|
||||||
|
#self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
|
||||||
|
|
||||||
|
|
||||||
def __del__(self, *args):
|
def __del__(self, *args):
|
||||||
self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
|
#self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
|
||||||
self.data.gpudata.free()
|
self.data.gpudata.free()
|
||||||
self.data = None
|
self.data = None
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Enables downloading data from CL device to Python
|
Enables downloading data from GPU to Python
|
||||||
"""
|
"""
|
||||||
def download(self, stream, async=False):
|
def download(self, stream, async=False):
|
||||||
|
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||||
|
#Allocate host memory
|
||||||
|
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
|
||||||
|
cpu_data = np.empty((self.ny, self.nx), dtype=np.float32)
|
||||||
|
|
||||||
#Copy data from device to host
|
#Create copy object from device to host
|
||||||
if (async):
|
copy = cuda.Memcpy2D()
|
||||||
self.logger.debug("Buffer <%s> [%dx%d]: Downloading async ", int(self.data.gpudata), self.nx, self.ny)
|
copy.set_src_device(self.data.gpudata)
|
||||||
host_data = self.data.get_async(stream=stream)
|
copy.set_dst_host(cpu_data)
|
||||||
return host_data
|
|
||||||
else:
|
#Set offsets and pitch of source
|
||||||
self.logger.debug("Buffer <%s> [%dx%d]: Downloading synchronously", int(self.data.gpudata), self.nx, self.ny)
|
copy.src_x_in_bytes = self.x_halo*self.data.strides[1]
|
||||||
host_data = self.data.get(stream=stream)#, pagelocked=True) # pagelocked causes crash on windows at least
|
copy.src_y = self.y_halo
|
||||||
return host_data
|
copy.src_pitch = self.data.strides[0]
|
||||||
|
|
||||||
|
#Set width in bytes to copy for each row and
|
||||||
|
#number of rows to copy
|
||||||
|
copy.width_in_bytes = self.nx*cpu_data.itemsize
|
||||||
|
copy.height = self.ny
|
||||||
|
|
||||||
|
copy(stream)
|
||||||
|
if async==False:
|
||||||
|
stream.synchronize()
|
||||||
|
|
||||||
|
return cpu_data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -344,13 +395,13 @@ class SWEDataArakawaA:
|
|||||||
Uploads initial data to the CL device
|
Uploads initial data to the CL device
|
||||||
"""
|
"""
|
||||||
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||||
self.h0 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
self.h0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||||
self.hu0 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||||
self.hv0 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||||
|
|
||||||
self.h1 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
self.h1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||||
self.hu1 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
self.hu1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||||
self.hv1 = CUDAArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
self.hv1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Swaps the variables after a timestep has been completed
|
Swaps the variables after a timestep has been completed
|
@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -70,8 +70,8 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("FORCE_kernel.cu", "FORCEKernel", \
|
self.kernel = context.get_prepared_kernel("FORCE_kernel.cu", "FORCEKernel", \
|
||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "First order centered"
|
return "First order centered"
|
||||||
@ -84,12 +84,12 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -65,8 +65,8 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("HLL_kernel.cu", "HLLKernel", \
|
self.kernel = context.get_prepared_kernel("HLL_kernel.cu", "HLLKernel", \
|
||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer"
|
return "Harten-Lax-van Leer"
|
||||||
@ -79,12 +79,12 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -71,8 +71,8 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("HLL2_kernel.cu", "HLL2Kernel", \
|
self.kernel = context.get_prepared_kernel("HLL2_kernel.cu", "HLL2Kernel", \
|
||||||
"iifffffiPiPiPiPiPiPi", \
|
"iifffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer (2nd order)"
|
return "Harten-Lax-van Leer (2nd order)"
|
||||||
@ -90,12 +90,12 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
@ -106,12 +106,12 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
@ -21,13 +21,15 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from IPython.core import magic_arguments
|
||||||
from IPython.core.magic import line_magic, Magics, magics_class
|
from IPython.core.magic import line_magic, Magics, magics_class
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
from GPUSimulators import Common
|
||||||
|
|
||||||
|
|
||||||
@magics_class
|
@magics_class
|
||||||
class CudaContextHandler(Magics):
|
class MyIPythonMagic(Magics):
|
||||||
@line_magic
|
@line_magic
|
||||||
def cuda_context_handler(self, context_name):
|
def cuda_context_handler(self, context_name):
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
@ -39,7 +41,8 @@ class CudaContextHandler(Magics):
|
|||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
self.logger.debug("Creating context")
|
self.logger.debug("Creating context")
|
||||||
self.shell.ex(context_name + " = Common.CudaContext(verbose=True, blocking=False)")
|
#self.shell.ex(context_name + " = Common.CudaContext(blocking=False)")
|
||||||
|
self.shell.user_ns[context_name] = Common.CudaContext(blocking=False)
|
||||||
|
|
||||||
# this function will be called on exceptions in any cell
|
# this function will be called on exceptions in any cell
|
||||||
def custom_exc(shell, etype, evalue, tb, tb_offset=None):
|
def custom_exc(shell, etype, evalue, tb, tb_offset=None):
|
||||||
@ -51,11 +54,14 @@ class CudaContextHandler(Magics):
|
|||||||
|
|
||||||
if context_name in self.shell.user_ns.keys():
|
if context_name in self.shell.user_ns.keys():
|
||||||
self.logger.info("Pushing <%s>", str(self.shell.user_ns[context_name].cuda_context.handle))
|
self.logger.info("Pushing <%s>", str(self.shell.user_ns[context_name].cuda_context.handle))
|
||||||
self.shell.ex(context_name + ".cuda_context.push()")
|
#self.shell.ex(context_name + ".cuda_context.push()")
|
||||||
|
self.shell.user_ns[context_name].cuda_context.push()
|
||||||
else:
|
else:
|
||||||
self.logger.error("No CUDA context called %s found (something is wrong)", context_name)
|
self.logger.error("No CUDA context called %s found (something is wrong)", context_name)
|
||||||
self.logger.error("CUDA will not work now")
|
self.logger.error("CUDA will not work now")
|
||||||
|
|
||||||
|
self.logger.debug("==================================================================")
|
||||||
|
|
||||||
# still show the error within the notebook, don't just swallow it
|
# still show the error within the notebook, don't just swallow it
|
||||||
shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset)
|
shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset)
|
||||||
|
|
||||||
@ -71,10 +77,42 @@ class CudaContextHandler(Magics):
|
|||||||
context = cuda.Context.get_current()
|
context = cuda.Context.get_current()
|
||||||
self.logger.info("`-> Popping <%s>", str(context.handle))
|
self.logger.info("`-> Popping <%s>", str(context.handle))
|
||||||
cuda.Context.pop()
|
cuda.Context.pop()
|
||||||
|
self.logger.debug("==================================================================")
|
||||||
atexit.register(exitfunc)
|
atexit.register(exitfunc)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
@line_magic
|
||||||
logger.info("Registering automatic CUDA context handling")
|
@magic_arguments.magic_arguments()
|
||||||
logger.debug("(use %cuda_context_handler my_context to create a context called my_context")
|
@magic_arguments.argument(
|
||||||
|
'--out', '-o', type=str, default='output.log', help='The filename to store the log to')
|
||||||
|
@magic_arguments.argument(
|
||||||
|
'--level', '-l', type=int, default=20, help='The level of logging to screen [0, 50]')
|
||||||
|
@magic_arguments.argument(
|
||||||
|
'--file_level', '-f', type=int, default=10, help='The level of logging to file [0, 50]')
|
||||||
|
def setup_logging(self, line):
|
||||||
|
args = magic_arguments.parse_argstring(self.setup_logging, line)
|
||||||
|
import sys
|
||||||
|
|
||||||
|
#Get root logger
|
||||||
|
logger = logging.getLogger('')
|
||||||
|
logger.setLevel(min(args.level, args.file_level))
|
||||||
|
|
||||||
|
#Add log to screen
|
||||||
|
ch = logging.StreamHandler()
|
||||||
|
ch.setLevel(args.level)
|
||||||
|
logger.addHandler(ch)
|
||||||
|
logger.log(args.level, "Console logger using level %s", logging.getLevelName(args.level))
|
||||||
|
|
||||||
|
#Add log to file
|
||||||
|
logger.log(args.level, "File logger using level %s to %s", logging.getLevelName(args.file_level), args.out)
|
||||||
|
fh = logging.FileHandler(args.out)
|
||||||
|
formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s: %(message)s')
|
||||||
|
fh.setFormatter(formatter)
|
||||||
|
fh.setLevel(args.file_level)
|
||||||
|
logger.addHandler(fh)
|
||||||
|
|
||||||
|
logger.info("Python version %s", sys.version)
|
||||||
|
|
||||||
|
# Register
|
||||||
ip = get_ipython()
|
ip = get_ipython()
|
||||||
ip.register_magics(CudaContextHandler)
|
ip.register_magics(MyIPythonMagic)
|
||||||
|
|
@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -72,8 +72,8 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("KP07_kernel.cu", "KP07Kernel", \
|
self.kernel = context.get_prepared_kernel("KP07_kernel.cu", "KP07Kernel", \
|
||||||
"iifffffiPiPiPiPiPiPi", \
|
"iifffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova 2007"
|
return "Kurganov-Petrova 2007"
|
||||||
@ -88,12 +88,12 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(substep), \
|
np.int32(substep), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
|
|
||||||
def stepEuler(self, dt):
|
def stepEuler(self, dt):
|
@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -72,8 +72,8 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("KP07_dimsplit_kernel.cu", "KP07DimsplitKernel", \
|
self.kernel = context.get_prepared_kernel("KP07_dimsplit_kernel.cu", "KP07DimsplitKernel", \
|
||||||
"iifffffiPiPiPiPiPiPi", \
|
"iifffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova 2007 dimensionally split"
|
return "Kurganov-Petrova 2007 dimensionally split"
|
||||||
@ -91,12 +91,12 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
@ -107,12 +107,12 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -66,8 +66,8 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
# Get kernels
|
# Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
|
self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
|
||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Lax Friedrichs"
|
return "Lax Friedrichs"
|
||||||
@ -80,12 +80,12 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
@ -28,7 +28,7 @@ import pycuda.compiler as cuda_compiler
|
|||||||
import pycuda.gpuarray
|
import pycuda.gpuarray
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
from SWESimulators import Common
|
from GPUSimulators import Common
|
||||||
|
|
||||||
|
|
||||||
class BaseSimulator:
|
class BaseSimulator:
|
||||||
@ -57,6 +57,14 @@ class BaseSimulator:
|
|||||||
#Get logger
|
#Get logger
|
||||||
self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
|
self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
|
||||||
|
|
||||||
|
self.context = context
|
||||||
|
|
||||||
|
if (self.context.autotuner):
|
||||||
|
peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
|
||||||
|
block_width = int(peak_configuration["block_width"])
|
||||||
|
block_height = int(peak_configuration["block_height"])
|
||||||
|
self.logger.debug("Used autotuning to get block size [%d x %d]", block_width, block_height)
|
||||||
|
|
||||||
#Create a CUDA stream
|
#Create a CUDA stream
|
||||||
self.stream = cuda.Stream()
|
self.stream = cuda.Stream()
|
||||||
|
|
@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from SWESimulators import Simulator
|
from GPUSimulators import Simulator
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -65,8 +65,8 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
#Get kernels
|
#Get kernels
|
||||||
self.kernel = context.get_prepared_kernel("WAF_kernel.cu", "WAFKernel", \
|
self.kernel = context.get_prepared_kernel("WAF_kernel.cu", "WAFKernel", \
|
||||||
"iiffffiPiPiPiPiPiPi", \
|
"iiffffiPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=block_width, \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=block_height)
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Weighted average flux"
|
return "Weighted average flux"
|
||||||
@ -83,12 +83,12 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
@ -98,11 +98,11 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.pitch, \
|
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.pitch, \
|
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.pitch, \
|
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.pitch, \
|
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.pitch, \
|
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.pitch)
|
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||||
self.data.swap()
|
self.data.swap()
|
||||||
self.t += dt
|
self.t += dt
|
Loading…
x
Reference in New Issue
Block a user