feat(gpu): add compiler to HIPContext

This commit is contained in:
Anthony Berg 2025-06-30 14:11:57 +02:00
parent ecfdaaa39e
commit 8f90ec6291
11 changed files with 115 additions and 62 deletions

View File

@ -1,7 +1,6 @@
import logging
import os
import io
import re
import logging
from hashlib import md5
@ -21,7 +20,7 @@ class Context(object):
self.logger = logging.getLogger(__name__)
self.modules = {}
self.module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)) + language)
self.module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), f"{language}")
self.autotuner = None
@ -101,6 +100,7 @@ class Context(object):
return kernel_hasher.hexdigest()
def get_module(self, kernel_filename: str,
function: str,
include_dirs: dict = None,
defines: list[str] = None,
compile_args: dict = None,

View File

@ -204,7 +204,7 @@ class CudaContext(object):
if defines is None:
defines = {}
if include_dirs is None:
include_dirs = [os.path.join(self.module_path) + "include"]
include_dirs = [os.path.join(self.module_path), "include"]
if compile_args is None:
compile_args = {'no_extern_c': True}
if jit_compile_args is None:

View File

@ -3,9 +3,10 @@ import io
import os.path
import hip as hip_main
from hip import hip
from hip import hip, hiprtc
from GPUSimulators.common import Timer
from GPUSimulators.common import Timer, hip_check
from GPUSimulators.common.utils import get_includes
from GPUSimulators.gpu.context import Context
@ -19,6 +20,7 @@ class HIPContext(Context):
Creates a new HIP context.
"""
super().__init__("hip", device, context_flags, use_cache, autotuning)
self.prog = {}
# Log information about HIP version
self.logger.info(f"HIP Python version {hip_main.HIP_VERSION_NAME}")
@ -26,11 +28,11 @@ class HIPContext(Context):
# Device information
props = hip.hipDeviceProp_t()
self.__hip_check(hip.hipGetDeviceProperties(props, device))
device_count = self.__hip_check(hip.hipGetDeviceCount())
arch = props.gcnArchName
hip_check(hip.hipGetDeviceProperties(props, device))
device_count = hip_check(hip.hipGetDeviceCount())
self.arch = props.gcnArchName
self.logger.info(
f"Using device {device}/{device_count} '{props.name.decode('ascii')} ({arch.decode('ascii')})'"
f"Using device {device}/{device_count} '{props.name.decode()} ({self.arch.decode()})'"
+ f" ({props.pciBusID})"
)
self.logger.debug(f" => total available memory: {int(props.totalGlobalMem / pow(1024, 2))} MiB")
@ -38,34 +40,29 @@ class HIPContext(Context):
if autotuning:
self.logger.info(
"Autotuning enabled. It may take several minutes to run the code the first time: have patience")
raise (NotImplementedError("Autotuner is not yet implemented for HIP."))
raise NotImplementedError("Autotuner is not yet implemented for HIP.")
# TODO Implement Autotuner for HIP
# self.autotuner = Autotuner.Autotuner()
def __hip_check(self, call_request):
"""
Function that checks if the HIP function executed successfully.
"""
def __del__(self):
for module in self.modules.values():
hip_check(hip.hipModuleUnload(module))
err = call_request[0]
result = call_request[1:]
if len(result) == 1:
result = result[0]
if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess:
self.logger.error(f"HIP Error: {str(err)}")
raise RuntimeError(str(err))
return result
for prog in self.prog.values():
hip_check(hiprtc.hiprtcDestroyProgram(prog.createRef()))
def get_module(self, kernel_filename: str,
include_dirs: dict=None,
defines:dict[str: int]=None,
compile_args:dict=None,
jit_compile_args:dict=None):
function: str,
include_dirs: list[str] = None,
defines: dict[str: int] = None,
compile_args: dict[str: list] = None,
jit_compile_args: dict = None):
"""
Reads a ``.hip`` file and creates a HIP kernel from that.
Args:
kernel_filename: The file to use for the kernel.
function: The main function of the kernel.
include_dirs: List of directories for the ``#include``s referenced.
defines: Adds ``#define`` tags to the kernel, such as: ``#define key value``.
compile_args: Adds other compiler options (parameters) for ``pycuda.compiler.compile()``.
@ -78,12 +75,17 @@ class HIPContext(Context):
if defines is None:
defines = {}
if include_dirs is None:
include_dirs = []
include_dirs = [os.path.join(self.module_path, "include")]
if compile_args is None:
compile_args = {'no_extern_c': True}
compile_args = {'hip': []}
if jit_compile_args is None:
jit_compile_args = {}
compile_args = compile_args.get('hip')
compile_args = [bytes(arg, "utf-8")for arg in compile_args]
compile_args.append(b"--offload-arch=" + self.arch)
def compile_message_handler(compile_success_bool, info_str, error_str):
self.logger.debug(f"Compilation success: {str(compile_success_bool)}")
if info_str:
@ -91,7 +93,7 @@ class HIPContext(Context):
if error_str:
self.logger.debug(f"Compilation error: {error_str}")
kernel_filename = os.path.normpath("hip/" + kernel_filename + ".hip")
kernel_filename = os.path.normpath(kernel_filename + ".hip")
kernel_path = os.path.abspath(os.path.join(self.module_path, kernel_filename))
# Create a hash of the kernel options
@ -107,22 +109,23 @@ class HIPContext(Context):
kernel_hash = root + "_" + source_hash + "_" + options_hash + ext
cached_kernel_filename = os.path.join(self.cache_path, kernel_hash)
# Checking if the kernel is already in the hashmap
# Checks if the module is already cached in the hash map
if kernel_hash in self.modules.keys():
self.logger.debug(f"Found kernel {kernel_filename} cached in hashmap ({kernel_hash})")
self.logger.debug(f"Found kernel {kernel_filename} cached in hashmap ({kernel_hash}).")
return self.modules[kernel_hash]
elif self.use_cache and os.path.isfile(cached_kernel_filename):
self.logger.debug(f"Found kernerl {kernel_filename} cached on disk ({kernel_hash})")
# Check if the cache is on the disk
self.logger.debug(f"Found kernel {kernel_filename} cached on disk ({kernel_hash}).")
with io.open(cached_kernel_filename, "rb") as file:
file_str = file.read()
# TODO add ``module`` to HIP
module = None
code = file.read()
module = hip_check(hip.hipModuleLoadData(code))
self.modules[kernel_hash] = module
return module
else:
self.logger.debug(f"Compiling {kernel_filename} ({kernel_hash})")
# As it was not found in the cache, compile it.
self.logger.debug(f"Compiling {kernel_filename} ({kernel_hash}) for {self.arch}.")
# Create kernel string
kernel_string = ""
@ -138,23 +141,40 @@ class HIPContext(Context):
file.write(kernel_string)
with Timer("compiler") as timer:
import warnings
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="The CUDA compiler succeeded, but said the following:\nkernel.cu", category=UserWarning)
# TODO compile the binary file
bin = None
prog = hip_check(
hiprtc.hiprtcCreateProgram(bytes(kernel_string, "utf-8"), bytes(function, "utf-8"),
0, [], []))
err, = hiprtc.hiprtcCompileProgram(prog, len(compile_args), compile_args)
if err != hiprtc.hiprtcResult.HIPRTC_SUCCESS:
log_size = hip_check(hiprtc.hiprtcGetProgramLogSize(prog))
log = bytearray(log_size)
hip_check(hiprtc.hiprtcGetProgramLog(prog, log))
raise RuntimeError(log.decode())
code_size = hip_check(hiprtc.hiprtcGetCodeSize(prog))
code = bytearray(code_size)
hip_check(hiprtc.hiprtcGetCode(prog, code))
module = hip_check(hip.hipModuleLoadData(code))
# TODO get binary from buffer
module = None
if self.use_cache:
with io.open(cached_kernel_filename, "wb") as file:
file.write(bin)
file.write(code)
self.modules[kernel_hash] = module
self.prog[kernel_hash] = prog
return module
def synchronize(self):
self.__hip_check(hip.hipDeviceSynchronize())
hip_check(hip.hipDeviceSynchronize())
test = HIPContext()
test.get_module("SWE2D_HLL",
"HLLKernel",
defines={
'BLOCK_WIDTH': 8,
'BLOCK_HEIGHT': 8
},
jit_compile_args={})

View File

@ -42,7 +42,8 @@ class EE2DKP07Dimsplit(BaseSimulator):
theta=1.3,
cfl_scale=0.9,
boundary_conditions=BoundaryCondition(),
block_width=16, block_height=8):
block_width=16, block_height=8,
compile_opts: list[str] = []):
"""
Initialization routine
@ -75,13 +76,17 @@ class EE2DKP07Dimsplit(BaseSimulator):
# Get kernels
module = context.get_module("EE2D_KP07_dimsplit",
"KP07DimsplitKernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"],
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("KP07DimsplitKernel")

View File

@ -72,13 +72,17 @@ class Force(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_FORCE",
"FORCEKernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("FORCEKernel")

View File

@ -71,13 +71,17 @@ class HLL(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_HLL",
"HLLKernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("HLLKernel")

View File

@ -73,13 +73,17 @@ class HLL2(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_HLL2",
"HLL2Kernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("HLL2Kernel")

View File

@ -80,13 +80,17 @@ class KP07(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_KP07",
"KP07Kernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("KP07Kernel")

View File

@ -80,13 +80,17 @@ class KP07Dimsplit(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_KP07_dimsplit",
"KP07DimsplitKernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("KP07DimsplitKernel")

View File

@ -72,13 +72,17 @@ class LxF(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_LxF",
"LxFKernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("LxFKernel")

View File

@ -72,13 +72,17 @@ class WAF(BaseSimulator):
# Get kernels
module = context.get_module("SWE2D_WAF",
"WAFKernel",
defines={
'BLOCK_WIDTH': self.block_size[0],
'BLOCK_HEIGHT': self.block_size[1]
},
compile_args={
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
'cuda': {
'no_extern_c': True,
'options': ["--use_fast_math"] + compile_opts,
},
'hip': compile_opts,
},
jit_compile_args={})
self.kernel = module.get_function("WAFKernel")