mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:27:40 +01:00 
			
		
		
		
	feat(gpu): add compiler to HIPContext
This commit is contained in:
		
							parent
							
								
									ecfdaaa39e
								
							
						
					
					
						commit
						8f90ec6291
					
				| @ -1,7 +1,6 @@ | ||||
| import logging | ||||
| import os | ||||
| import io | ||||
| import re | ||||
| import logging | ||||
| from hashlib import md5 | ||||
| 
 | ||||
| @ -21,7 +20,7 @@ class Context(object): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|         self.modules = {} | ||||
| 
 | ||||
|         self.module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)) + language) | ||||
|         self.module_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), f"{language}") | ||||
| 
 | ||||
|         self.autotuner = None | ||||
| 
 | ||||
| @ -101,6 +100,7 @@ class Context(object): | ||||
|         return kernel_hasher.hexdigest() | ||||
| 
 | ||||
|     def get_module(self, kernel_filename: str, | ||||
|                    function: str, | ||||
|                    include_dirs: dict = None, | ||||
|                    defines: list[str] = None, | ||||
|                    compile_args: dict = None, | ||||
|  | ||||
| @ -204,7 +204,7 @@ class CudaContext(object): | ||||
|         if defines is None: | ||||
|             defines = {} | ||||
|         if include_dirs is None: | ||||
|             include_dirs = [os.path.join(self.module_path) + "include"] | ||||
|             include_dirs = [os.path.join(self.module_path), "include"] | ||||
|         if compile_args is None: | ||||
|             compile_args = {'no_extern_c': True} | ||||
|         if jit_compile_args is None: | ||||
|  | ||||
| @ -3,9 +3,10 @@ import io | ||||
| import os.path | ||||
| 
 | ||||
| import hip as hip_main | ||||
| from hip import hip | ||||
| from hip import hip, hiprtc | ||||
| 
 | ||||
| from GPUSimulators.common import Timer | ||||
| from GPUSimulators.common import Timer, hip_check | ||||
| from GPUSimulators.common.utils import get_includes | ||||
| from GPUSimulators.gpu.context import Context | ||||
| 
 | ||||
| 
 | ||||
| @ -19,6 +20,7 @@ class HIPContext(Context): | ||||
|         Creates a new HIP context. | ||||
|         """ | ||||
|         super().__init__("hip", device, context_flags, use_cache, autotuning) | ||||
|         self.prog = {} | ||||
| 
 | ||||
|         # Log information about HIP version | ||||
|         self.logger.info(f"HIP Python version {hip_main.HIP_VERSION_NAME}") | ||||
| @ -26,11 +28,11 @@ class HIPContext(Context): | ||||
| 
 | ||||
|         # Device information | ||||
|         props = hip.hipDeviceProp_t() | ||||
|         self.__hip_check(hip.hipGetDeviceProperties(props, device)) | ||||
|         device_count = self.__hip_check(hip.hipGetDeviceCount()) | ||||
|         arch = props.gcnArchName | ||||
|         hip_check(hip.hipGetDeviceProperties(props, device)) | ||||
|         device_count = hip_check(hip.hipGetDeviceCount()) | ||||
|         self.arch = props.gcnArchName | ||||
|         self.logger.info( | ||||
|             f"Using device {device}/{device_count} '{props.name.decode('ascii')} ({arch.decode('ascii')})'" | ||||
|             f"Using device {device}/{device_count} '{props.name.decode()} ({self.arch.decode()})'" | ||||
|             + f" ({props.pciBusID})" | ||||
|         ) | ||||
|         self.logger.debug(f" => total available memory: {int(props.totalGlobalMem / pow(1024, 2))} MiB") | ||||
| @ -38,34 +40,29 @@ class HIPContext(Context): | ||||
|         if autotuning: | ||||
|             self.logger.info( | ||||
|                 "Autotuning enabled. It may take several minutes to run the code the first time: have patience") | ||||
|             raise (NotImplementedError("Autotuner is not yet implemented for HIP.")) | ||||
|             raise NotImplementedError("Autotuner is not yet implemented for HIP.") | ||||
|             # TODO Implement Autotuner for HIP | ||||
|             # self.autotuner = Autotuner.Autotuner() | ||||
| 
 | ||||
|     def __hip_check(self, call_request): | ||||
|         """ | ||||
|         Function that checks if the HIP function executed successfully. | ||||
|         """ | ||||
|     def __del__(self): | ||||
|         for module in self.modules.values(): | ||||
|             hip_check(hip.hipModuleUnload(module)) | ||||
| 
 | ||||
|         err = call_request[0] | ||||
|         result = call_request[1:] | ||||
|         if len(result) == 1: | ||||
|             result = result[0] | ||||
|         if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess: | ||||
|             self.logger.error(f"HIP Error: {str(err)}") | ||||
|             raise RuntimeError(str(err)) | ||||
|         return result | ||||
|         for prog in self.prog.values(): | ||||
|             hip_check(hiprtc.hiprtcDestroyProgram(prog.createRef())) | ||||
| 
 | ||||
|     def get_module(self, kernel_filename: str, | ||||
|                    include_dirs: dict=None, | ||||
|                    defines:dict[str: int]=None, | ||||
|                    compile_args:dict=None, | ||||
|                    jit_compile_args:dict=None): | ||||
|                    function: str, | ||||
|                    include_dirs: list[str] = None, | ||||
|                    defines: dict[str: int] = None, | ||||
|                    compile_args: dict[str: list] = None, | ||||
|                    jit_compile_args: dict = None): | ||||
|         """ | ||||
|         Reads a ``.hip`` file and creates a HIP kernel from that. | ||||
| 
 | ||||
|         Args: | ||||
|             kernel_filename: The file to use for the kernel. | ||||
|             function: The main function of the kernel. | ||||
|             include_dirs: List of directories for the ``#include``s referenced. | ||||
|             defines: Adds ``#define`` tags to the kernel, such as: ``#define key value``. | ||||
|             compile_args: Adds other compiler options (parameters) for ``pycuda.compiler.compile()``. | ||||
| @ -78,12 +75,17 @@ class HIPContext(Context): | ||||
|         if defines is None: | ||||
|             defines = {} | ||||
|         if include_dirs is None: | ||||
|             include_dirs = [] | ||||
|             include_dirs = [os.path.join(self.module_path, "include")] | ||||
|         if compile_args is None: | ||||
|             compile_args = {'no_extern_c': True} | ||||
|             compile_args = {'hip': []} | ||||
|         if jit_compile_args is None: | ||||
|             jit_compile_args = {} | ||||
| 
 | ||||
|         compile_args = compile_args.get('hip') | ||||
| 
 | ||||
|         compile_args = [bytes(arg, "utf-8")for arg in compile_args] | ||||
|         compile_args.append(b"--offload-arch=" + self.arch) | ||||
| 
 | ||||
|         def compile_message_handler(compile_success_bool, info_str, error_str): | ||||
|             self.logger.debug(f"Compilation success: {str(compile_success_bool)}") | ||||
|             if info_str: | ||||
| @ -91,7 +93,7 @@ class HIPContext(Context): | ||||
|             if error_str: | ||||
|                 self.logger.debug(f"Compilation error: {error_str}") | ||||
| 
 | ||||
|         kernel_filename = os.path.normpath("hip/" + kernel_filename + ".hip") | ||||
|         kernel_filename = os.path.normpath(kernel_filename + ".hip") | ||||
|         kernel_path = os.path.abspath(os.path.join(self.module_path, kernel_filename)) | ||||
| 
 | ||||
|         # Create a hash of the kernel options | ||||
| @ -107,22 +109,23 @@ class HIPContext(Context): | ||||
|         kernel_hash = root + "_" + source_hash + "_" + options_hash + ext | ||||
|         cached_kernel_filename = os.path.join(self.cache_path, kernel_hash) | ||||
| 
 | ||||
|         # Checking if the kernel is already in the hashmap | ||||
|         # Checks if the module is already cached in the hash map | ||||
|         if kernel_hash in self.modules.keys(): | ||||
|             self.logger.debug(f"Found kernel {kernel_filename} cached in hashmap ({kernel_hash})") | ||||
|             self.logger.debug(f"Found kernel {kernel_filename} cached in hashmap ({kernel_hash}).") | ||||
|             return self.modules[kernel_hash] | ||||
|         elif self.use_cache and os.path.isfile(cached_kernel_filename): | ||||
|             self.logger.debug(f"Found kernerl {kernel_filename} cached on disk ({kernel_hash})") | ||||
|             # Check if the cache is on the disk | ||||
|             self.logger.debug(f"Found kernel {kernel_filename} cached on disk ({kernel_hash}).") | ||||
| 
 | ||||
|             with io.open(cached_kernel_filename, "rb") as file: | ||||
|                 file_str = file.read() | ||||
|                 # TODO add ``module`` to HIP | ||||
|                 module = None | ||||
|                 code = file.read() | ||||
|                 module = hip_check(hip.hipModuleLoadData(code)) | ||||
| 
 | ||||
|             self.modules[kernel_hash] = module | ||||
|             return module | ||||
|         else: | ||||
|             self.logger.debug(f"Compiling {kernel_filename} ({kernel_hash})") | ||||
|             # As it was not found in the cache, compile it. | ||||
|             self.logger.debug(f"Compiling {kernel_filename} ({kernel_hash}) for {self.arch}.") | ||||
| 
 | ||||
|             # Create kernel string | ||||
|             kernel_string = "" | ||||
| @ -138,23 +141,40 @@ class HIPContext(Context): | ||||
|                     file.write(kernel_string) | ||||
| 
 | ||||
|             with Timer("compiler") as timer: | ||||
|                 import warnings | ||||
|                 with warnings.catch_warnings(): | ||||
|                     warnings.filterwarnings("ignore", message="The CUDA compiler succeeded, but said the following:\nkernel.cu", category=UserWarning) | ||||
|                     # TODO compile the binary file | ||||
|                     bin = None | ||||
|                 prog = hip_check( | ||||
|                     hiprtc.hiprtcCreateProgram(bytes(kernel_string, "utf-8"), bytes(function, "utf-8"), | ||||
|                                                0, [], [])) | ||||
| 
 | ||||
|                 err, = hiprtc.hiprtcCompileProgram(prog, len(compile_args), compile_args) | ||||
|                 if err != hiprtc.hiprtcResult.HIPRTC_SUCCESS: | ||||
|                     log_size = hip_check(hiprtc.hiprtcGetProgramLogSize(prog)) | ||||
|                     log = bytearray(log_size) | ||||
|                     hip_check(hiprtc.hiprtcGetProgramLog(prog, log)) | ||||
|                     raise RuntimeError(log.decode()) | ||||
| 
 | ||||
|                 code_size = hip_check(hiprtc.hiprtcGetCodeSize(prog)) | ||||
|                 code = bytearray(code_size) | ||||
|                 hip_check(hiprtc.hiprtcGetCode(prog, code)) | ||||
|                 module = hip_check(hip.hipModuleLoadData(code)) | ||||
| 
 | ||||
|                 # TODO get binary from buffer | ||||
|                 module = None | ||||
|                 if self.use_cache: | ||||
|                     with io.open(cached_kernel_filename, "wb") as file: | ||||
|                         file.write(bin) | ||||
|                         file.write(code) | ||||
| 
 | ||||
|             self.modules[kernel_hash] = module | ||||
|             self.prog[kernel_hash] = prog | ||||
|             return module | ||||
| 
 | ||||
|     def synchronize(self): | ||||
|         self.__hip_check(hip.hipDeviceSynchronize()) | ||||
|         hip_check(hip.hipDeviceSynchronize()) | ||||
| 
 | ||||
| 
 | ||||
| test = HIPContext() | ||||
| 
 | ||||
| test.get_module("SWE2D_HLL", | ||||
|                 "HLLKernel", | ||||
|                 defines={ | ||||
|                     'BLOCK_WIDTH': 8, | ||||
|                     'BLOCK_HEIGHT': 8 | ||||
|                 }, | ||||
|                 jit_compile_args={}) | ||||
|  | ||||
| @ -42,7 +42,8 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|                  theta=1.3, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=8): | ||||
|                  block_width=16, block_height=8, | ||||
|                 compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
| 
 | ||||
| @ -75,13 +76,17 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("EE2D_KP07_dimsplit", | ||||
|                                     "KP07DimsplitKernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"], | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|  | ||||
| @ -72,13 +72,17 @@ class Force(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_FORCE", | ||||
|                                     "FORCEKernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("FORCEKernel") | ||||
|  | ||||
| @ -71,13 +71,17 @@ class HLL(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_HLL", | ||||
|                                     "HLLKernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLLKernel") | ||||
|  | ||||
| @ -73,13 +73,17 @@ class HLL2(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_HLL2", | ||||
|                                     "HLL2Kernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLL2Kernel") | ||||
|  | ||||
| @ -80,13 +80,17 @@ class KP07(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_KP07", | ||||
|                                     "KP07Kernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07Kernel") | ||||
|  | ||||
| @ -80,13 +80,17 @@ class KP07Dimsplit(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_KP07_dimsplit", | ||||
|                                     "KP07DimsplitKernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|  | ||||
| @ -72,13 +72,17 @@ class LxF(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_LxF", | ||||
|                                     "LxFKernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("LxFKernel") | ||||
|  | ||||
| @ -72,13 +72,17 @@ class WAF(BaseSimulator): | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("SWE2D_WAF", | ||||
|                                     "WAFKernel", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         'cuda': { | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts, | ||||
|                                         }, | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("WAFKernel") | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg