mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
Pinned memory
This commit is contained in:
parent
12174b39db
commit
2c6ecc8d88
@ -38,6 +38,7 @@ import json
|
|||||||
import pycuda.compiler as cuda_compiler
|
import pycuda.compiler as cuda_compiler
|
||||||
import pycuda.gpuarray
|
import pycuda.gpuarray
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
from pycuda.tools import PageLockedMemoryPool
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -482,6 +483,9 @@ class CudaArray2D:
|
|||||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||||
self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
|
self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
|
||||||
|
|
||||||
|
#For returning to download
|
||||||
|
self.memorypool = PageLockedMemoryPool()
|
||||||
|
|
||||||
#If we don't have any data, just allocate and return
|
#If we don't have any data, just allocate and return
|
||||||
if cpu_data is None:
|
if cpu_data is None:
|
||||||
return
|
return
|
||||||
@ -518,8 +522,10 @@ class CudaArray2D:
|
|||||||
if (cpu_data is None):
|
if (cpu_data is None):
|
||||||
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||||
#Allocate host memory
|
#Allocate host memory
|
||||||
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
|
#The following fails, don't know why (crashes python)
|
||||||
cpu_data = np.empty((ny, nx), dtype=np.float32)
|
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)32)
|
||||||
|
#Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32)
|
||||||
|
cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32)
|
||||||
|
|
||||||
assert nx == cpu_data.shape[1]
|
assert nx == cpu_data.shape[1]
|
||||||
assert ny == cpu_data.shape[0]
|
assert ny == cpu_data.shape[0]
|
||||||
@ -610,6 +616,9 @@ class CudaArray3D:
|
|||||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||||
self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
|
self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
|
||||||
|
|
||||||
|
#For returning to download
|
||||||
|
self.memorypool = PageLockedMemoryPool()
|
||||||
|
|
||||||
#If we don't have any data, just allocate and return
|
#If we don't have any data, just allocate and return
|
||||||
if cpu_data is None:
|
if cpu_data is None:
|
||||||
return
|
return
|
||||||
@ -662,7 +671,8 @@ class CudaArray3D:
|
|||||||
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||||
#Allocate host memory
|
#Allocate host memory
|
||||||
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
|
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
|
||||||
cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32)
|
#cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32)
|
||||||
|
cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32)
|
||||||
|
|
||||||
#Create copy object from device to host
|
#Create copy object from device to host
|
||||||
copy = cuda.Memcpy2D()
|
copy = cuda.Memcpy2D()
|
||||||
|
@ -47,6 +47,10 @@ class MagicCudaContext(Magics):
|
|||||||
|
|
||||||
self.logger.info("Registering %s in user workspace", args.name)
|
self.logger.info("Registering %s in user workspace", args.name)
|
||||||
|
|
||||||
|
context_flags = None
|
||||||
|
if (args.blocking):
|
||||||
|
context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC
|
||||||
|
|
||||||
if args.name in self.shell.user_ns.keys():
|
if args.name in self.shell.user_ns.keys():
|
||||||
self.logger.debug("Context already registered! Ignoring")
|
self.logger.debug("Context already registered! Ignoring")
|
||||||
return
|
return
|
||||||
@ -54,7 +58,7 @@ class MagicCudaContext(Magics):
|
|||||||
self.logger.debug("Creating context")
|
self.logger.debug("Creating context")
|
||||||
use_cache = False if args.no_cache else True
|
use_cache = False if args.no_cache else True
|
||||||
use_autotuning = False if args.no_autotuning else True
|
use_autotuning = False if args.no_autotuning else True
|
||||||
self.shell.user_ns[args.name] = CudaContext.CudaContext(blocking=args.blocking, use_cache=use_cache, autotuning=use_autotuning)
|
self.shell.user_ns[args.name] = CudaContext.CudaContext(context_flags=context_flags, use_cache=use_cache, autotuning=use_autotuning)
|
||||||
|
|
||||||
# this function will be called on exceptions in any cell
|
# this function will be called on exceptions in any cell
|
||||||
def custom_exc(shell, etype, evalue, tb, tb_offset=None):
|
def custom_exc(shell, etype, evalue, tb, tb_offset=None):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user