FiniteVolumeGPU/GPUSimulators/gpu/hip_handler.py

import ctypes

import numpy as np
from hip import hip, hipblas

from GPUSimulators.common import hip_check
from GPUSimulators.gpu.handler import BaseGPUHandler
from GPUSimulators.gpu import KernelContext


class HIPHandler(BaseGPUHandler):
    def __init__(self, context: KernelContext, module, function, arguments,
                 grid_size):
        super().__init__(context, module, function, arguments, grid_size)

        self.kernel = hip_check(hip.hipModuleGetFunction(module, bytes(function, "utf-8")))
        self.context = context

        self.dtype = np.float32
        self.cfl_data_h = np.empty(grid_size, dtype=self.dtype)

        self.num_bytes = self.cfl_data_h.size * self.cfl_data_h.itemsize
        self.cfl_data = hip_check(hip.hipMalloc(self.num_bytes)).configure(
            typestr=np.finfo(self.dtype).dtype.name, shape=grid_size
        )

    def __del__(self):
        hip_check(hip.hipFree(self.cfl_data))

    def prepared_call(self, grid_size, block_size, stream, args):
        if len(grid_size) < 3:
            grid_size = (*grid_size, 1)

        for i in range(len(args)):
            val = args[i]
            if isinstance(val, int) or isinstance(val, np.int32):
                args[i] = ctypes.c_int(val)
            elif isinstance(val, float) or isinstance(val, np.float32):
                args[i] = ctypes.c_float(val)

        args = tuple(args)

        hip_check(hip.hipModuleLaunchKernel(
            self.kernel,
            *grid_size,
            *block_size,
            0,
            stream,
            None,
            args
        ))


    def array_fill(self, data, stream):
        self.cfl_data_h.fill(data)

        hip_check(
            hip.hipMemcpyAsync(self.cfl_data, self.cfl_data_h, self.num_bytes, hip.hipMemcpyKind.hipMemcpyHostToDevice,
                               stream))

    def array_min(self, stream):
        handle = hip_check(hipblas.hipblasCreate())

        value = np.empty(1, self.dtype)
        hip_check(hipblas.hipblasIsamin(handle, self.cfl_data.size, self.cfl_data, 1, value))
        hip_check(hipblas.hipblasDestroy(handle))

        hip_check(hip.hipMemcpy(value, self.cfl_data, self.cfl_data_h.itemsize, hip.hipMemcpyKind.hipMemcpyDeviceToHost))

        return value[0]