Merge pull request #6 from babrodtk/autotuning

Updated domain size benchmark in autotuning
This commit is contained in:
André R. Brodtkorb 2018-08-23 16:06:35 +02:00 committed by GitHub
commit 1ff3485918
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 303 additions and 17 deletions

File diff suppressed because one or more lines are too long

View File

@ -29,7 +29,7 @@ from socket import gethostname
import pycuda.driver as cuda import pycuda.driver as cuda
from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF from GPUSimulators import Common, Simulator
class Autotuner: class Autotuner:
def __init__(self, def __init__(self,

View File

@ -100,7 +100,7 @@ class CudaContext(object):
self.cache_path = os.path.join(self.module_path, "cuda_cache") self.cache_path = os.path.join(self.module_path, "cuda_cache")
if not os.path.isdir(self.cache_path): if not os.path.isdir(self.cache_path):
os.mkdir(self.cache_path) os.mkdir(self.cache_path)
self.logger.debug("Using CUDA cache dir %s", self.cache_path) self.logger.info("Using CUDA cache dir %s", self.cache_path)
self.autotuner = None self.autotuner = None
if (autotuning): if (autotuning):
@ -395,6 +395,7 @@ class SWEDataArakawaA:
Uploads initial data to the CL device Uploads initial data to the CL device
""" """
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0): def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
self.logger = logging.getLogger(__name__)
self.h0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0) self.h0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0) self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0) self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)

View File

@ -31,33 +31,43 @@ from GPUSimulators import Common
@magics_class @magics_class
class MyIPythonMagic(Magics): class MyIPythonMagic(Magics):
@line_magic @line_magic
def cuda_context_handler(self, context_name): @magic_arguments.magic_arguments()
@magic_arguments.argument(
'name', type=str, help='Name of context to create')
@magic_arguments.argument(
'--blocking', '-b', action="store_true", help='Enable blocking context')
@magic_arguments.argument(
'--no_cache', '-nc', action="store_true", help='Disable caching of kernels')
@magic_arguments.argument(
'--no_autotuning', '-na', action="store_true", help='Disable autotuning of kernels')
def cuda_context_handler(self, line):
args = magic_arguments.parse_argstring(self.cuda_context_handler, line)
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
self.logger.debug("Registering %s as a global context", context_name) self.logger.info("Registering %s in user workspace", args.name)
if context_name in self.shell.user_ns.keys(): if args.name in self.shell.user_ns.keys():
self.logger.debug("Context already registered! Ignoring") self.logger.debug("Context already registered! Ignoring")
return return
else: else:
self.logger.debug("Creating context") self.logger.debug("Creating context")
#self.shell.ex(context_name + " = Common.CudaContext(blocking=False)") use_cache = False if args.no_cache else True
self.shell.user_ns[context_name] = Common.CudaContext(blocking=False) use_autotuning = False if args.no_autotuning else True
self.shell.user_ns[args.name] = Common.CudaContext(blocking=args.blocking, use_cache=use_cache, autotuning=use_autotuning)
# this function will be called on exceptions in any cell # this function will be called on exceptions in any cell
def custom_exc(shell, etype, evalue, tb, tb_offset=None): def custom_exc(shell, etype, evalue, tb, tb_offset=None):
self.logger.exception("Exception caught: Resetting to CUDA context %s", context_name) self.logger.exception("Exception caught: Resetting to CUDA context %s", args.name)
while (cuda.Context.get_current() != None): while (cuda.Context.get_current() != None):
context = cuda.Context.get_current() context = cuda.Context.get_current()
self.logger.info("Popping <%s>", str(context.handle)) self.logger.info("Popping <%s>", str(context.handle))
cuda.Context.pop() cuda.Context.pop()
if context_name in self.shell.user_ns.keys(): if args.name in self.shell.user_ns.keys():
self.logger.info("Pushing <%s>", str(self.shell.user_ns[context_name].cuda_context.handle)) self.logger.info("Pushing <%s>", str(self.shell.user_ns[args.name].cuda_context.handle))
#self.shell.ex(context_name + ".cuda_context.push()") self.shell.user_ns[args.name].cuda_context.push()
self.shell.user_ns[context_name].cuda_context.push()
else: else:
self.logger.error("No CUDA context called %s found (something is wrong)", context_name) self.logger.error("No CUDA context called %s found (something is wrong)", args.name)
self.logger.error("CUDA will not work now") self.logger.error("CUDA will not work now")
self.logger.debug("==================================================================") self.logger.debug("==================================================================")

View File

@ -69,6 +69,8 @@ class BaseSimulator:
self.stream = cuda.Stream() self.stream = cuda.Stream()
#Create data by uploading to device #Create data by uploading to device
free, total = cuda.mem_get_info()
self.logger.debug("GPU memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
self.data = Common.SWEDataArakawaA(self.stream, \ self.data = Common.SWEDataArakawaA(self.stream, \
nx, ny, \ nx, ny, \
ghost_cells_x, ghost_cells_y, \ ghost_cells_x, ghost_cells_y, \