From 2c6ecc8d886ef8466fae226aa15fbed2f37295ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20R=2E=20Brodtkorb?= Date: Mon, 10 Dec 2018 12:08:10 +0100 Subject: [PATCH] Pinned memory --- GPUSimulators/Common.py | 16 +++++++++++++--- GPUSimulators/IPythonMagic.py | 6 +++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/GPUSimulators/Common.py b/GPUSimulators/Common.py index ddc7701..5e8fd24 100644 --- a/GPUSimulators/Common.py +++ b/GPUSimulators/Common.py @@ -38,6 +38,7 @@ import json import pycuda.compiler as cuda_compiler import pycuda.gpuarray import pycuda.driver as cuda +from pycuda.tools import PageLockedMemoryPool @@ -482,6 +483,9 @@ class CudaArray2D: #Should perhaps use pycuda.driver.mem_alloc_data.pitch() here self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype) + #For returning to download + self.memorypool = PageLockedMemoryPool() + #If we don't have any data, just allocate and return if cpu_data is None: return @@ -518,8 +522,10 @@ class CudaArray2D: if (cpu_data is None): #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) #Allocate host memory - #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32) - cpu_data = np.empty((ny, nx), dtype=np.float32) + #The following fails, don't know why (crashes python) + #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)32) + #Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32) + cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32) assert nx == cpu_data.shape[1] assert ny == cpu_data.shape[0] @@ -610,6 +616,9 @@ class CudaArray3D: #Should perhaps use pycuda.driver.mem_alloc_data.pitch() here self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype) + #For returning to download + self.memorypool = PageLockedMemoryPool() + #If we don't have any data, just allocate and return if cpu_data is None: return @@ -662,7 +671,8 @@ class CudaArray3D: #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) #Allocate host memory #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32) - cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32) + #cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32) + cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32) #Create copy object from device to host copy = cuda.Memcpy2D() diff --git a/GPUSimulators/IPythonMagic.py b/GPUSimulators/IPythonMagic.py index 2cca8c1..fa452df 100644 --- a/GPUSimulators/IPythonMagic.py +++ b/GPUSimulators/IPythonMagic.py @@ -47,6 +47,10 @@ class MagicCudaContext(Magics): self.logger.info("Registering %s in user workspace", args.name) + context_flags = None + if (args.blocking): + context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC + if args.name in self.shell.user_ns.keys(): self.logger.debug("Context already registered! Ignoring") return @@ -54,7 +58,7 @@ class MagicCudaContext(Magics): self.logger.debug("Creating context") use_cache = False if args.no_cache else True use_autotuning = False if args.no_autotuning else True - self.shell.user_ns[args.name] = CudaContext.CudaContext(blocking=args.blocking, use_cache=use_cache, autotuning=use_autotuning) + self.shell.user_ns[args.name] = CudaContext.CudaContext(context_flags=context_flags, use_cache=use_cache, autotuning=use_autotuning) # this function will be called on exceptions in any cell def custom_exc(shell, etype, evalue, tb, tb_offset=None):