mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:17:41 +01:00 
			
		
		
		
	feat(simulator): add autotuning for HIP
This commit is contained in:
		
							parent
							
								
									716394f46b
								
							
						
					
					
						commit
						ab6660d719
					
				| @ -26,12 +26,11 @@ import os | ||||
| from socket import gethostname | ||||
| 
 | ||||
| import numpy as np | ||||
| import pycuda.driver as cuda | ||||
| from tqdm.auto import tqdm | ||||
| 
 | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.common import Timer | ||||
| from GPUSimulators.gpu import KernelContext | ||||
| from GPUSimulators.gpu import KernelContext, Event | ||||
| 
 | ||||
| 
 | ||||
| def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2): | ||||
| @ -51,8 +50,8 @@ def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2): | ||||
|         return np.nan | ||||
| 
 | ||||
|     # Create timer events | ||||
|     start = cuda.Event() | ||||
|     end = cuda.Event() | ||||
|     start = Event() | ||||
|     end = Event() | ||||
| 
 | ||||
|     # Warmup | ||||
|     for i in range(warmup_timesteps): | ||||
| @ -74,9 +73,9 @@ def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2): | ||||
|     # Sanity check solution | ||||
|     h, hu, hv = sim.download() | ||||
|     sane = True | ||||
|     sane = sane and sanity_check(0.3, 0.7) | ||||
|     sane = sane and sanity_check(-0.2, 0.2) | ||||
|     sane = sane and sanity_check(-0.2, 0.2) | ||||
|     sane = sane and sanity_check(h, 0.3, 0.7) | ||||
|     sane = sane and sanity_check(hu, -0.2, 0.2) | ||||
|     sane = sane and sanity_check(hv, -0.2, 0.2) | ||||
| 
 | ||||
|     if sane: | ||||
|         logger.debug(f"{simulator.__name__} [{arguments["block_width"]} x {arguments["block_height"]}] succeeded: " | ||||
| @ -170,7 +169,7 @@ def benchmark_single_simulator(simulator, arguments, block_widths, block_heights | ||||
|             sim_arguments.update({'block_height': block_height}) | ||||
|             for i, block_width in enumerate(tqdm(block_widths, desc=f'Iteration {j} Progress', leave=False)): | ||||
|                 sim_arguments.update({'block_width': block_width}) | ||||
|                 megacells[j, i] = run_benchmark(sim_arguments) | ||||
|                 megacells[j, i] = run_benchmark(simulator, sim_arguments) | ||||
| 
 | ||||
|     logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs) | ||||
| 
 | ||||
| @ -207,14 +206,14 @@ class Autotuner: | ||||
|         # Set arguments to send to the simulators during construction | ||||
|         context = KernelContext(autotuning=False) | ||||
|         g = 9.81 | ||||
|         h0, hu0, hv0, dx, dy, dt = gen_test_data(ny=self.ny, g=g) | ||||
|         h0, hu0, hv0, dx, dy, dt = gen_test_data(nx=self.nx, ny=self.ny, g=g) | ||||
|         arguments = { | ||||
|             'context': context, | ||||
|             'h0': h0, 'hu0': hu0, 'hv0': hv0, | ||||
|             'nx': self.nx, 'ny': self.ny, | ||||
|             'dx': dx, 'dy': dy, 'dt': 0.9 * dt, | ||||
|             'g': g, | ||||
|             'compile_opts': ['-Wno-deprecated-gpu-targets'] | ||||
|             'compile_opts': [] | ||||
|         } | ||||
| 
 | ||||
|         # Load existing data into memory | ||||
| @ -227,7 +226,7 @@ class Autotuner: | ||||
|                     benchmark_data[k] = v | ||||
| 
 | ||||
|         # Run benchmark | ||||
|         benchmark_data[key + "_megacells"] = benchmark_single_simulator(arguments, self.block_widths, | ||||
|         benchmark_data[key + "_megacells"] = benchmark_single_simulator(simulator, arguments, self.block_widths, | ||||
|                                                                         self.block_heights) | ||||
|         benchmark_data[key + "_block_widths"] = self.block_widths | ||||
|         benchmark_data[key + "_block_heights"] = self.block_heights | ||||
| @ -268,9 +267,9 @@ class Autotuner: | ||||
|                     self.benchmark(simulator) | ||||
|                     data = np.load(self.filename) | ||||
| 
 | ||||
|                 def find_max_index(megacells): | ||||
|                     max_index = np.nanargmax(megacells) | ||||
|                     return np.unravel_index(max_index, megacells.shape) | ||||
|                 def find_max_index(megacells_arg): | ||||
|                     max_index = np.nanargmax(megacells_arg) | ||||
|                     return np.unravel_index(max_index, megacells_arg.shape) | ||||
| 
 | ||||
|                 megacells = data[key + '_megacells'] | ||||
|                 block_widths = data[key + '_block_widths'] | ||||
| @ -282,7 +281,3 @@ class Autotuner: | ||||
|                                          "megacells": megacells[j, i]} | ||||
|                 logger.debug(f"Returning {self.performance[key]} as peak performance parameters") | ||||
|                 return self.performance[key] | ||||
| 
 | ||||
|             # This should never happen | ||||
|             raise "Something wrong: Could not get autotuning data!" | ||||
|             return None | ||||
|  | ||||
| @ -5,6 +5,8 @@ __env_name = 'GPU_LANG' | ||||
| if __env_name in environ and environ.get(__env_name).lower() == "cuda": | ||||
|     from .cuda_context import CudaContext as KernelContext | ||||
|     from .cuda_handler import CudaHandler as GPUHandler | ||||
|     from .cuda_event import CudaEvent as Event | ||||
| else: | ||||
|     from .hip_context import HIPContext as KernelContext | ||||
|     from .hip_handler import HIPHandler as GPUHandler | ||||
|     from .hip_handler import HIPHandler as GPUHandler | ||||
|     from .hip_event import HIPEvent as Event | ||||
| @ -28,7 +28,7 @@ import pycuda.compiler as cuda_compiler | ||||
| import pycuda.gpuarray | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from GPUSimulators import Autotuner | ||||
| from GPUSimulators.Autotuner import Autotuner | ||||
| from GPUSimulators.common import Timer | ||||
| from GPUSimulators.gpu.context import Context | ||||
| 
 | ||||
| @ -79,7 +79,7 @@ class CudaContext(Context): | ||||
|         if autotuning: | ||||
|             self.logger.info( | ||||
|                 "Autotuning enabled. It may take several minutes to run the code the first time: have patience") | ||||
|             self.autotuner = Autotuner.Autotuner() | ||||
|             self.autotuner = Autotuner() | ||||
| 
 | ||||
|     def __del__(self, *args): | ||||
|         self.logger.info(f"Cleaning up CUDA context handle <{str(self.cuda_context.handle)}>") | ||||
|  | ||||
							
								
								
									
										43
									
								
								GPUSimulators/gpu/cuda_event.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								GPUSimulators/gpu/cuda_event.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,43 @@ | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from .event import BaseEvent | ||||
| 
 | ||||
| 
 | ||||
| class CudaEvent(BaseEvent): | ||||
|     """ | ||||
|     A GPU Event handler. | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         """ | ||||
|         Creates a GPU Event. | ||||
|         """ | ||||
|         super().__init__() | ||||
|         self.event = cuda.Event() | ||||
| 
 | ||||
|     def record(self, stream): | ||||
|         """ | ||||
|         Insert a recording point into the ``stream``. | ||||
| 
 | ||||
|         Args: | ||||
|             stream: The stream to insert the recording point into. | ||||
|         """ | ||||
|         self.event.record(stream) | ||||
| 
 | ||||
|     def synchronize(self): | ||||
|         """ | ||||
|         Wait for the event to complete. | ||||
|         """ | ||||
|         self.event.synchronize() | ||||
| 
 | ||||
|     def time_since(self, start): | ||||
|         """ | ||||
|         Return the elapsed time from the ``start`` event and this class. | ||||
| 
 | ||||
|         Args: | ||||
|             start: The Event to measure time from. | ||||
| 
 | ||||
|         Returns: | ||||
|             Time since the ``start`` event and the end time of this class. | ||||
|         """ | ||||
|         return self.event.time_since(start) | ||||
							
								
								
									
										36
									
								
								GPUSimulators/gpu/event.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								GPUSimulators/gpu/event.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | ||||
| class BaseEvent(object): | ||||
|     """ | ||||
|     A GPU Event handler. | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         """ | ||||
|         Creates a GPU Event. | ||||
|         """ | ||||
| 
 | ||||
|     def record(self, stream): | ||||
|         """ | ||||
|         Insert a recording point into the ``stream``. | ||||
| 
 | ||||
|         Args: | ||||
|             stream: The stream to insert the recording point into. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
| 
 | ||||
|     def synchronize(self): | ||||
|         """ | ||||
|         Wait for the event to complete. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
| 
 | ||||
|     def time_since(self, start) -> float: | ||||
|         """ | ||||
|         Return the elapsed time from the ``start`` event and this class. | ||||
| 
 | ||||
|         Args: | ||||
|             start: The Event to measure time from. | ||||
| 
 | ||||
|         Returns: | ||||
|             Time since the ``start`` event and the end time of this class. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
| @ -37,11 +37,10 @@ class HIPContext(Context): | ||||
|         self.logger.debug(f" => total available memory: {int(props.totalGlobalMem / pow(1024, 2))} MiB") | ||||
| 
 | ||||
|         if autotuning: | ||||
|             from GPUSimulators.Autotuner import Autotuner | ||||
|             self.logger.info( | ||||
|                 "Autotuning enabled. It may take several minutes to run the code the first time: have patience") | ||||
|             raise NotImplementedError("Autotuner is not yet implemented for HIP.") | ||||
|             # TODO Implement Autotuner for HIP | ||||
|             # self.autotuner = Autotuner.Autotuner() | ||||
|             self.autotuner = Autotuner() | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         for module in self.modules.values(): | ||||
|  | ||||
							
								
								
									
										51
									
								
								GPUSimulators/gpu/hip_event.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								GPUSimulators/gpu/hip_event.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,51 @@ | ||||
| from hip import hip | ||||
| from hip.hip import ihipStream_t, ihipEvent_t | ||||
| 
 | ||||
| from .event import BaseEvent | ||||
| from GPUSimulators.common import hip_check | ||||
| 
 | ||||
| 
 | ||||
| class HIPEvent(BaseEvent): | ||||
|     """ | ||||
|     A GPU Event handler. | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         """ | ||||
|         Creates a GPU Event. | ||||
|         """ | ||||
|         super().__init__() | ||||
|         self.event = hip_check(hip.hipEventCreate()) | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         hip_check(hip.hipEventDestroy(self.event)) | ||||
| 
 | ||||
|     def record(self, stream: ihipStream_t | object): | ||||
|         """ | ||||
|         Insert a recording point into the ``stream``. | ||||
| 
 | ||||
|         Args: | ||||
|             stream: The stream to insert the recording point into. | ||||
|         """ | ||||
|         hip_check(hip.hipEventRecord(self.event, stream)) | ||||
| 
 | ||||
|     def synchronize(self): | ||||
|         """ | ||||
|         Wait for the event to complete. | ||||
|         """ | ||||
|         hip_check(hip.hipEventSynchronize(self.event)) | ||||
| 
 | ||||
|     def time_since(self, start: ihipEvent_t | object): | ||||
|         """ | ||||
|         Return the elapsed time from the ``start`` event and this class. | ||||
| 
 | ||||
|         Args: | ||||
|             start: The Event to measure time from. Can also use the HIPEvent class instead of obj.event. | ||||
| 
 | ||||
|         Returns: | ||||
|             Time since the ``start`` event and the end time of this class. | ||||
|         """ | ||||
|         if isinstance(start, HIPEvent): | ||||
|             start = start.event | ||||
| 
 | ||||
|         return hip_check(hip.hipEventElapsedTime(start, self.event)) | ||||
							
								
								
									
										2494
									
								
								HIPTestSchemes.ipynb
									
									
									
									
									
								
							
							
						
						
									
										2494
									
								
								HIPTestSchemes.ipynb
									
									
									
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg