Pinned memory

2025-05-18 06:24:13 +02:00 · 2018-12-10 12:08:10 +01:00 · 2018-12-10 12:08:10 +01:00 · 2c6ecc8d88
commit 2c6ecc8d88
parent 12174b39db
2 changed files with 18 additions and 4 deletions
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -38,6 +38,7 @@ import json
 import pycuda.compiler as cuda_compiler
 import pycuda.gpuarray
 import pycuda.driver as cuda
 from pycuda.tools import PageLockedMemoryPool
@ -482,6 +483,9 @@ class CudaArray2D:
        #Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
        self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
        #For returning to download
        self.memorypool = PageLockedMemoryPool()
        #If we don't have any data, just allocate and return
        if cpu_data is None:
            return
@ -518,8 +522,10 @@ class CudaArray2D:
        if (cpu_data is None):
            #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
            #Allocate host memory
-            #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
+            #The following fails, don't know why (crashes python)
-            cpu_data = np.empty((ny, nx), dtype=np.float32)
+            #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)32)
            #Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32)
            cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32)
        assert nx == cpu_data.shape[1]
        assert ny == cpu_data.shape[0]
@ -610,6 +616,9 @@ class CudaArray3D:
        #Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
        self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
        #For returning to download
        self.memorypool = PageLockedMemoryPool()
        #If we don't have any data, just allocate and return
        if cpu_data is None:
            return
@ -662,7 +671,8 @@ class CudaArray3D:
        #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
        #Allocate host memory
        #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
-        cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32)
+        #cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32)
        cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32)
        #Create copy object from device to host
        copy = cuda.Memcpy2D()
--- a/GPUSimulators/IPythonMagic.py
+++ b/GPUSimulators/IPythonMagic.py
@ -47,6 +47,10 @@ class MagicCudaContext(Magics):
        self.logger.info("Registering %s in user workspace", args.name)
        context_flags = None
        if (args.blocking):
            context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC
        if args.name in self.shell.user_ns.keys():
            self.logger.debug("Context already registered! Ignoring")
            return
@ -54,7 +58,7 @@ class MagicCudaContext(Magics):
            self.logger.debug("Creating context")
            use_cache = False if args.no_cache else True
            use_autotuning = False if args.no_autotuning else True
-            self.shell.user_ns[args.name] = CudaContext.CudaContext(blocking=args.blocking, use_cache=use_cache, autotuning=use_autotuning)
+            self.shell.user_ns[args.name] = CudaContext.CudaContext(context_flags=context_flags, use_cache=use_cache, autotuning=use_autotuning)
        # this function will be called on exceptions in any cell
        def custom_exc(shell, etype, evalue, tb, tb_offset=None):