diff --git a/GPUSimulators/common/arrays/array2d.py b/GPUSimulators/common/arrays/array2d.py index cd4c6c3..99a964e 100644 --- a/GPUSimulators/common/arrays/array2d.py +++ b/GPUSimulators/common/arrays/array2d.py @@ -46,6 +46,12 @@ class BaseArray2D(object): """ raise NotImplementedError("This function needs to be implemented in a subclass.") + def get_pitch(self) -> int: + """ + Gets the number of bytes it takes to move to the next row. + """ + raise NotImplementedError("This function needs to be implemented in a subclass.") + def check(self, x, y, nx, ny, cpu_data): if nx != cpu_data.shape[1]: raise ValueError diff --git a/GPUSimulators/common/arrays/cuda/array2d.py b/GPUSimulators/common/arrays/cuda/array2d.py index 66c6e76..4b98402 100644 --- a/GPUSimulators/common/arrays/cuda/array2d.py +++ b/GPUSimulators/common/arrays/cuda/array2d.py @@ -109,5 +109,8 @@ class CudaArray2D(BaseArray2D): copy(stream) - def get_strides(self) -> tuple[int, ...]: + def get_strides(self) -> tuple[int, int]: return self.data.strides[0] + + def get_pitch(self) -> int: + return self.data.strides[0][0] diff --git a/GPUSimulators/common/arrays/hip/arkawa2d.py b/GPUSimulators/common/arrays/hip/arkawa2d.py index a01780c..0a89ab9 100644 --- a/GPUSimulators/common/arrays/hip/arkawa2d.py +++ b/GPUSimulators/common/arrays/hip/arkawa2d.py @@ -1,3 +1,5 @@ +import ctypes + import numpy as np from hip import hip, hipblas @@ -13,18 +15,34 @@ def _sum_array(array: HIPArray2D): array: A HIPArray2D to compute the sum of. """ result_h = np.zeros(1, dtype=array.dtype) - num_bytes = result_h.size * result_h.itemsize + num_bytes = result_h.strides[0] result_d = hip_check(hip.hipMalloc(num_bytes)) # Sum the ``data_h`` array using hipblas handle = hip_check(hipblas.hipblasCreate()) - hip_check(hipblas.hipblasSasum(handle, array.num_bytes, array.data, 1, result_d)) - hip_check(hipblas.hipblasDestroy(handle)) + + # Using pitched memory, so we need to sum row by row + total_sum_d = hip_check(hip.hipMalloc(num_bytes)) + hip_check(hip.hipMemset(total_sum_d, 0, num_bytes)) + + width, height = array.shape + + for y in range(height): + row_ptr = int(array.data) + y * array.pitch_d + + hip_check(hipblas.hipblasSasum(handle, width, row_ptr, 1, result_d)) + + hip_check(hipblas.hipblasSaxpy(handle, 1, ctypes.c_float(1.0), result_d, 1, total_sum_d, 1)) + + hip_check(hip.hipMemcpy(result_h, total_sum_d, num_bytes, hip.hipMemcpyKind.hipMemcpyDeviceToHost)) # Copy over the result from the device - hip_check(hip.hipMemcpy(result_h, result_d, num_bytes, hip.hipMemcpyKind.hipMemcpyDeviceToHost)) + hip_check(hip.hipMemcpy(result_h, total_sum_d, num_bytes, hip.hipMemcpyKind.hipMemcpyDeviceToHost)) + # Cleanup + hip_check(hipblas.hipblasDestroy(handle)) hip_check(hip.hipFree(result_d)) + hip_check(hip.hipFree(total_sum_d)) return result_h diff --git a/GPUSimulators/common/arrays/hip/array2d.py b/GPUSimulators/common/arrays/hip/array2d.py index 0ee4188..444a618 100644 --- a/GPUSimulators/common/arrays/hip/array2d.py +++ b/GPUSimulators/common/arrays/hip/array2d.py @@ -1,3 +1,5 @@ +from enum import Enum + import numpy as np from hip import hip @@ -5,12 +7,18 @@ from ...hip_check import hip_check from ..array2d import BaseArray2D +class TransferType(Enum): + HOST_TO_DEVICE = 0 + DEVICE_TO_HOST = 1 + + class HIPArray2D(BaseArray2D): """ Class that holds 2D HIP data """ - def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype: np.dtype = np.float32): + def __init__(self, stream: hip.ihipStream_t, nx: int, ny: int, x_halo: int, y_halo: int, + cpu_data: np.ndarray = None, dtype: np.dtype = np.float32()): """ Uploads initial data to the HIP device """ @@ -18,30 +26,60 @@ class HIPArray2D(BaseArray2D): super().__init__(nx, ny, x_halo, y_halo, cpu_data) # self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny) self.dtype = dtype - self.data_h = np.zeros(self.shape, self.dtype) - self.num_bytes = self.data_h.size * self.data_h.itemsize - self.data = hip_check(hip.hipMalloc(self.num_bytes)).configure( - typestr=np.finfo(self.dtype).dtype.name, shape=self.shape - ) + self.data_h = np.zeros(self.shape, self.dtype) + + shape_x = self.shape[0] + shape_y = self.shape[1] + + self.width = shape_x * self.dtype.itemsize + self.height = shape_y + + self.num_bytes = self.width * self.height + + self.data, self.pitch_d = hip_check(hip.hipMallocPitch(self.width, self.height)) + + # TODO fix hipMallocPitch and remove this + # self.pitch_d = self.width + # self.data = hip_check(hip.hipMalloc(self.width * self.height)) + + # Initialise the memory with an array of zeros. + init_h = np.zeros(self.shape, self.dtype) + self.pitch_h = shape_x * init_h.itemsize + hip_check(hip.hipMemcpy2DAsync(self.data, self.pitch_d, + init_h, self.pitch_h, + self.width, self.height, + hip.hipMemcpyKind.hipMemcpyHostToDevice, stream)) # If there is no data to append, just leave this array as allocated if cpu_data is None: return + host_x = cpu_data.shape[1] + host_y = cpu_data.shape[0] + # Create a copy object from host to device - x = (self.shape[0] - cpu_data.shape[1]) // 2 - y = (self.shape[1] - cpu_data.shape[0]) // 2 - self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]]) + x = (shape_x - host_y) // 2 + y = (shape_y - host_x) // 2 + self.upload(stream, cpu_data, extent=(x, y, host_x, host_y)) # self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny) def __del__(self, *args): # self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny) hip_check(hip.hipFree(self.data)) - def download(self, stream, cpu_data=None, asynch=False, extent=None): + def download(self, stream: hip.ihipStream_t, cpu_data: np.ndarray = None, asynch=False, + extent: tuple[int, int, int, int] = None) -> np.ndarray: """ Enables downloading data from GPU to Python + Args: + stream: The GPU stream to add the memory copy to. + cpu_data: The array to store the data copied from GPU memory. + asynch: Synchronize the stream before returning `cpu_data`. + extent: Parameters for where in the GPU memory to copy from. + Returns: + `cpu_data` with the data from the GPU memory. + Note the data in `cpu_data` may be uninitialized if `asynch` was not set to `True`. """ if extent is None: @@ -57,16 +95,34 @@ class HIPArray2D(BaseArray2D): # Allocate host memory cpu_data = np.zeros((ny, nx), dtype=self.dtype) - copy_args = hip.hip_Memcpy2D(**self.__get_copy_info(x, y, nx, ny, cpu_data, True)) + self.check(x, y, nx, ny, cpu_data) - hip_check(hip.hipMemcpyParam2DAsync(copy_args, stream)) + pitch_h, width, height = self.__get_array_vars(cpu_data, nx, ny) + + # Parameters to copy to GPU memory + copy = hip.hip_Memcpy2D( + srcDevice=self.data, + srcPitch=self.pitch_d, + srcXInBytes=x * self.dtype.itemsize, + srcY=y, + srcMemoryType=hip.hipMemoryType.hipMemoryTypeDevice, + + dstHost=cpu_data, + dstPitch=pitch_h, + dstMemoryType=hip.hipMemoryType.hipMemoryTypeHost, + + WidthInBytes=width, + Height=height + ) + + hip_check(hip.hipMemcpyParam2DAsync(copy, stream)) if not asynch: hip_check(hip.hipStreamSynchronize(stream)) return cpu_data - def upload(self, stream, cpu_data, extent=None): + def upload(self, stream: hip.ihipStream_t, cpu_data: np.ndarray, extent: tuple[int, int, int, int] = None): if extent is None: x = self.x_halo y = self.y_halo @@ -75,58 +131,52 @@ class HIPArray2D(BaseArray2D): else: x, y, nx, ny = extent - copy_param = hip.hip_Memcpy2D(**self.__get_copy_info(x, y, nx, ny, cpu_data)) + pitch_h, width, height = self.__get_array_vars(cpu_data, nx, ny) - hip_check(hip.hipMemcpyParam2DAsync(copy_param, stream)) + self.check(x, y, nx, ny, cpu_data) - def get_strides(self) -> tuple[int, ...]: - strides = [] - for i in range(len(self.data_h.shape)): - strides.append(self.data_h.shape[i] * np.float32().itemsize) + # Parameters to copy to GPU memory + copy = hip.hip_Memcpy2D( + srcHost = cpu_data, + srcPitch = pitch_h, + srcMemoryType = hip.hipMemoryType.hipMemoryTypeHost, - return tuple(strides) + dstDevice = self.data, + dstPitch = self.pitch_d, + dstXInBytes = x * self.dtype.itemsize, + dstY = y, + dstMemoryType = hip.hipMemoryType.hipMemoryTypeDevice, - def __get_copy_info(self, x, y, nx, ny, host, to_host=False): - self.check(x, y, nx, ny, host) + WidthInBytes = width, + Height = height + ) - # Arguments for the host data - src_args = [ - 'Host', - 0, - 0, - hip.hipMemoryType.hipMemoryTypeHost, - host, - host.strides[0] + hip_check(hip.hipMemcpyParam2DAsync(copy, stream)) - ] - # Arguments for the device - dst_args = [ - 'Device', - int(x) * np.float32().itemsize, - int(y), - hip.hipMemoryType.hipMemoryTypeDevice, - self.data, - self.get_strides()[0], - ] + def get_strides(self) -> tuple[int, int]: + return self.pitch_d, self.dtype.itemsize - if to_host: - src_args, dst_args = dst_args, src_args + def get_pitch(self) -> int: + return self.pitch_d - args = { - 'srcXInBytes': src_args[1], - 'srcY': src_args[2], - 'srcMemoryType': src_args[3], - f'src{src_args[0]}': src_args[4], - 'srcPitch': src_args[5], + def __get_array_vars(self, cpu_data: np.ndarray, nx: int = None, ny: int = None) -> tuple[int, int, int]: + """ + Gets the variables used for defining the array. + Args: + nx: Height of the array, in elements. + ny: Width of the array, in elements. + """ - 'dstXInBytes': dst_args[1], - 'dstY': dst_args[2], - 'dstMemoryType': dst_args[3], - f'dst{dst_args[0]}': dst_args[4], - 'dstPitch': dst_args[5], + if nx is None and ny is None: + width = self.nx * cpu_data.itemsize + height = self.ny + elif nx is not None and ny is not None: + width = int(nx) * cpu_data.itemsize + height = int(ny) + else: + raise ValueError("Can only get variables if either all variables are parsed to the function, or none. " + + "Cannot only have 1 variable parsed into the function.") - 'WidthInBytes': int(nx) * np.float32().itemsize, - 'Height': int(ny) - } + pitch_h = cpu_data.strides[0] - return args + return pitch_h, width, height diff --git a/GPUSimulators/model/ee2d_kp07_dimsplit.py b/GPUSimulators/model/ee2d_kp07_dimsplit.py index 632e3e1..e57a462 100644 --- a/GPUSimulators/model/ee2d_kp07_dimsplit.py +++ b/GPUSimulators/model/ee2d_kp07_dimsplit.py @@ -122,14 +122,14 @@ class EE2DKP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u0[3].data, self.u0[3].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], - self.u1[3].data, self.u1[3].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u0[3].data, self.u0[3].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), + self.u1[3].data, self.u1[3].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) @@ -153,14 +153,14 @@ class EE2DKP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u0[3].data, self.u0[3].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], - self.u1[3].data, self.u1[3].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u0[3].data, self.u0[3].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), + self.u1[3].data, self.u1[3].get_pitch(), self.handler.cfl_data, 0, self.ny - int(self.u0[0].y_halo), self.nx, self.ny]) @@ -176,14 +176,14 @@ class EE2DKP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u0[3].data, self.u0[3].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], - self.u1[3].data, self.u1[3].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u0[3].data, self.u0[3].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), + self.u1[3].data, self.u1[3].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, int(self.u0[0].y_halo)]) @@ -201,14 +201,14 @@ class EE2DKP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u0[3].data, self.u0[3].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], - self.u1[3].data, self.u1[3].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u0[3].data, self.u0[3].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), + self.u1[3].data, self.u1[3].get_pitch(), self.handler.cfl_data, 0, 0, int(self.u0[0].x_halo), self.ny]) @@ -224,14 +224,14 @@ class EE2DKP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u0[3].data, self.u0[3].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], - self.u1[3].data, self.u1[3].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u0[3].data, self.u0[3].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), + self.u1[3].data, self.u1[3].get_pitch(), self.handler.cfl_data, self.nx - int(self.u0[0].x_halo), 0, self.nx, self.ny]) @@ -249,14 +249,14 @@ class EE2DKP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u0[3].data, self.u0[3].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], - self.u1[3].data, self.u1[3].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u0[3].data, self.u0[3].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), + self.u1[3].data, self.u1[3].get_pitch(), self.handler.cfl_data, int(self.u0[0].x_halo), int(self.u0[0].y_halo), self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo)]) diff --git a/GPUSimulators/model/force.py b/GPUSimulators/model/force.py index 8dc48a8..fa759a1 100644 --- a/GPUSimulators/model/force.py +++ b/GPUSimulators/model/force.py @@ -112,12 +112,12 @@ class Force(BaseSimulator): self.dx, self.dy, dt, self.g, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) diff --git a/GPUSimulators/model/hll.py b/GPUSimulators/model/hll.py index 347a604..5dda7b6 100644 --- a/GPUSimulators/model/hll.py +++ b/GPUSimulators/model/hll.py @@ -112,12 +112,12 @@ class HLL(BaseSimulator): self.dx, self.dy, dt, self.g, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) diff --git a/GPUSimulators/model/hll2.py b/GPUSimulators/model/hll2.py index 1ba38ec..3fd0c6c 100644 --- a/GPUSimulators/model/hll2.py +++ b/GPUSimulators/model/hll2.py @@ -118,12 +118,12 @@ class HLL2(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) diff --git a/GPUSimulators/model/kp07.py b/GPUSimulators/model/kp07.py index 638d405..008fadf 100644 --- a/GPUSimulators/model/kp07.py +++ b/GPUSimulators/model/kp07.py @@ -125,12 +125,12 @@ class KP07(BaseSimulator): self.theta, conversion.step_order_to_coded_int(step=substep, order=self.order), self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) diff --git a/GPUSimulators/model/kp07_dimsplit.py b/GPUSimulators/model/kp07_dimsplit.py index a0dd054..fb3dcca 100644 --- a/GPUSimulators/model/kp07_dimsplit.py +++ b/GPUSimulators/model/kp07_dimsplit.py @@ -125,12 +125,12 @@ class KP07Dimsplit(BaseSimulator): self.theta, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) diff --git a/GPUSimulators/model/lxf.py b/GPUSimulators/model/lxf.py index 99b46f8..5b0129f 100644 --- a/GPUSimulators/model/lxf.py +++ b/GPUSimulators/model/lxf.py @@ -117,12 +117,12 @@ class LxF(BaseSimulator): self.dx, self.dy, dt, self.g, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny]) diff --git a/GPUSimulators/model/waf.py b/GPUSimulators/model/waf.py index 85e8ce6..57f90ff 100644 --- a/GPUSimulators/model/waf.py +++ b/GPUSimulators/model/waf.py @@ -116,12 +116,12 @@ class WAF(BaseSimulator): self.g, substep, self.boundary_conditions, - self.u0[0].data, self.u0[0].get_strides()[0], - self.u0[1].data, self.u0[1].get_strides()[0], - self.u0[2].data, self.u0[2].get_strides()[0], - self.u1[0].data, self.u1[0].get_strides()[0], - self.u1[1].data, self.u1[1].get_strides()[0], - self.u1[2].data, self.u1[2].get_strides()[0], + self.u0[0].data, self.u0[0].get_pitch(), + self.u0[1].data, self.u0[1].get_pitch(), + self.u0[2].data, self.u0[2].get_pitch(), + self.u1[0].data, self.u1[0].get_pitch(), + self.u1[1].data, self.u1[1].get_pitch(), + self.u1[2].data, self.u1[2].get_pitch(), self.handler.cfl_data, 0, 0, self.nx, self.ny])