FiniteVolumeGPU/GPUSimulators/common/arrays/hip/array2d.py

from enum import Enum

import numpy as np
from hip import hip

from ...hip_check import hip_check
from ..array2d import BaseArray2D


class TransferType(Enum):
    HOST_TO_DEVICE = 0
    DEVICE_TO_HOST = 1


class HIPArray2D(BaseArray2D):
    """
    Class that holds 2D HIP data
    """

    def __init__(self, stream: hip.ihipStream_t, nx: int, ny: int, x_halo: int, y_halo: int,
                 cpu_data: np.ndarray = None, dtype: np.dtype = np.float32()):
        """
        Uploads initial data to the HIP device
        """

        super().__init__(nx, ny, x_halo, y_halo, cpu_data)
        # self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
        self.dtype = dtype

        self.data_h = np.zeros(self.shape, self.dtype)

        shape_x = self.shape[0]
        shape_y = self.shape[1]

        self.width = shape_x * self.dtype.itemsize
        self.height = shape_y

        self.num_bytes = self.width * self.height

        self.data, self.pitch_d = hip_check(hip.hipMallocPitch(self.width, self.height))

        # Initialise the memory with an array of zeros.
        init_h = np.zeros(self.shape, self.dtype)
        self.pitch_h = shape_x * init_h.itemsize
        hip_check(hip.hipMemcpy2DAsync(self.data, self.pitch_d,
                                       init_h, self.pitch_h,
                                       self.width, self.height,
                                       hip.hipMemcpyKind.hipMemcpyHostToDevice, stream))

        # If there is no data to append, just leave this array as allocated
        if cpu_data is None:
            return

        host_x = cpu_data.shape[1]
        host_y = cpu_data.shape[0]

        # Create a copy object from host to device
        x = (shape_x - host_y) // 2
        y = (shape_y - host_x) // 2
        self.upload(stream, cpu_data, extent=(x, y, host_x, host_y))
        # self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)

    def __del__(self, *args):
        # self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
        hip_check(hip.hipFree(self.data))

    def download(self, stream: hip.ihipStream_t, cpu_data: np.ndarray = None, asynch=False,
                 extent: tuple[int, int, int, int] = None) -> np.ndarray:
        """
        Enables downloading data from GPU to Python
        Args:
            stream: The GPU stream to add the memory copy to.
            cpu_data: The array to store the data copied from GPU memory.
            asynch: Synchronize the stream before returning `cpu_data`.
            extent: Parameters for where in the GPU memory to copy from.
        Returns:
            `cpu_data` with the data from the GPU memory.
            Note the data in `cpu_data` may be uninitialized if `asynch` was not set to `True`.
        """

        if extent is None:
            x = self.x_halo
            y = self.y_halo
            nx = self.nx
            ny = self.ny
        else:
            x, y, nx, ny = extent

        if cpu_data is None:
            # self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
            # Allocate host memory
            cpu_data = np.zeros((ny, nx), dtype=self.dtype)

        self.check(x, y, nx, ny, cpu_data)

        pitch_h, width, height = self.__get_array_vars(cpu_data, nx, ny)

        # Parameters to copy to GPU memory
        copy = hip.hip_Memcpy2D(
            srcDevice=self.data,
            srcPitch=self.pitch_d,
            srcXInBytes=x * self.dtype.itemsize,
            srcY=y,
            srcMemoryType=hip.hipMemoryType.hipMemoryTypeDevice,

            dstHost=cpu_data,
            dstPitch=pitch_h,
            dstMemoryType=hip.hipMemoryType.hipMemoryTypeHost,

            WidthInBytes=width,
            Height=height
        )

        hip_check(hip.hipMemcpyParam2DAsync(copy, stream))

        if not asynch:
            hip_check(hip.hipStreamSynchronize(stream))

        return cpu_data

    def upload(self, stream: hip.ihipStream_t, cpu_data: np.ndarray, extent: tuple[int, int, int, int] = None):
        if extent is None:
            x = self.x_halo
            y = self.y_halo
            nx = self.nx
            ny = self.ny
        else:
            x, y, nx, ny = extent

        pitch_h, width, height = self.__get_array_vars(cpu_data, nx, ny)

        self.check(x, y, nx, ny, cpu_data)

        # Parameters to copy to GPU memory
        copy = hip.hip_Memcpy2D(
            srcHost = cpu_data,
            srcPitch = pitch_h,
            srcMemoryType = hip.hipMemoryType.hipMemoryTypeHost,

            dstDevice = self.data,
            dstPitch = self.pitch_d,
            dstXInBytes = x * self.dtype.itemsize,
            dstY = y,
            dstMemoryType = hip.hipMemoryType.hipMemoryTypeDevice,

            WidthInBytes = width,
            Height = height
        )

        hip_check(hip.hipMemcpyParam2DAsync(copy, stream))

    def get_strides(self) -> tuple[int, int]:
        return self.pitch_d, self.dtype.itemsize

    def get_pitch(self) -> int:
        return self.pitch_d

    def __get_array_vars(self, cpu_data: np.ndarray, nx: int = None, ny: int = None) -> tuple[int, int, int]:
        """
        Gets the variables used for defining the array.
        Args:
            nx: Height of the array, in elements.
            ny: Width of the array, in elements.
        """

        if nx is None and ny is None:
            width = self.nx * cpu_data.itemsize
            height = self.ny
        elif nx is not None and ny is not None:
            width = int(nx) * cpu_data.itemsize
            height = int(ny)
        else:
            raise ValueError("Can only get variables if either all variables are parsed to the function, or none. " +
                             "Cannot only have 1 variable parsed into the function.")

        pitch_h = cpu_data.strides[0]

        return pitch_h, width, height