mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:17:41 +01:00 
			
		
		
		
	feat: add handlers to run models
This commit is contained in:
		
							parent
							
								
									dc40972878
								
							
						
					
					
						commit
						78564e1186
					
				| @ -24,7 +24,6 @@ import gc | ||||
| 
 | ||||
| from IPython.core import magic_arguments | ||||
| from IPython.core.magic import line_magic, Magics, magics_class | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from GPUSimulators.common import IPEngine | ||||
| from GPUSimulators.gpu import KernelContext | ||||
| @ -43,14 +42,15 @@ class MagicCudaContext(Magics): | ||||
|     @magic_arguments.argument( | ||||
|         '--no_autotuning', '-na', action="store_true", help='Disable autotuning of kernels') | ||||
|     def cuda_context_handler(self, line): | ||||
|         # import pycuda.driver as cuda | ||||
|         args = magic_arguments.parse_argstring(self.cuda_context_handler, line) | ||||
|         self.logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|         self.logger.info(f"Registering {args.name} in user workspace") | ||||
| 
 | ||||
|         context_flags = None | ||||
|         if args.blocking: | ||||
|             context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC | ||||
|         # if args.blocking: | ||||
|         #     context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC | ||||
| 
 | ||||
|         if args.name in self.shell.user_ns.keys(): | ||||
|             self.logger.debug("Context already registered! Ignoring") | ||||
| @ -65,19 +65,19 @@ class MagicCudaContext(Magics): | ||||
|         # this function will be called on exceptions in any cell | ||||
|         def custom_exc(shell, etype, evalue, tb, tb_offset=None): | ||||
|             self.logger.exception(f"Exception caught: Resetting to CUDA context {args.name}") | ||||
|             while cuda.Context.get_current() is not None: | ||||
|                 context = cuda.Context.get_current() | ||||
|                 self.logger.info(f"Popping <{str(context.handle)}>") | ||||
|                 cuda.Context.pop() | ||||
|             # while cuda.Context.get_current() is not None: | ||||
|             #     context = cuda.Context.get_current() | ||||
|             #     self.logger.info(f"Popping <{str(context.handle)}>") | ||||
|             #     cuda.Context.pop() | ||||
| 
 | ||||
|             if args.name in self.shell.user_ns.keys(): | ||||
|                 self.logger.info(f"Pushing <{str(self.shell.user_ns[args.name].cuda_context.handle)}>") | ||||
|                 self.shell.user_ns[args.name].cuda_context.push() | ||||
|             else: | ||||
|                 self.logger.error(f"No CUDA context called {args.name} found (something is wrong)") | ||||
|                 self.logger.error("CUDA will not work now") | ||||
| 
 | ||||
|             self.logger.debug("==================================================================") | ||||
|             # if args.name in self.shell.user_ns.keys(): | ||||
|             #     self.logger.info(f"Pushing <{str(self.shell.user_ns[args.name].context.handle)}>") | ||||
|             #     self.shell.user_ns[args.name].context.push() | ||||
|             # else: | ||||
|             #     self.logger.error(f"No CUDA context called {args.name} found (something is wrong)") | ||||
|             #     self.logger.error("CUDA will not work now") | ||||
|             # | ||||
|             # self.logger.debug("==================================================================") | ||||
| 
 | ||||
|             # still show the error within the notebook, don't just swallow it | ||||
|             shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset) | ||||
| @ -89,10 +89,10 @@ class MagicCudaContext(Magics): | ||||
|         import atexit | ||||
|         def exitfunc(): | ||||
|             self.logger.info("Exitfunc: Resetting CUDA context stack") | ||||
|             while cuda.Context.get_current() is not None: | ||||
|                 context = cuda.Context.get_current() | ||||
|                 self.logger.info(f"`-> Popping <{str(context.handle)}>") | ||||
|                 cuda.Context.pop() | ||||
|             # while cuda.Context.get_current() is not None: | ||||
|             #     context = cuda.Context.get_current() | ||||
|             #     self.logger.info(f"`-> Popping <{str(context.handle)}>") | ||||
|             #     cuda.Context.pop() | ||||
|             self.logger.debug("==================================================================") | ||||
| 
 | ||||
|         atexit.register(exitfunc) | ||||
|  | ||||
| @ -4,5 +4,7 @@ __env_name = 'GPU_LANG' | ||||
| 
 | ||||
| if __env_name in environ and environ.get(__env_name).lower() == "cuda": | ||||
|     from .cuda_context import CudaContext as KernelContext | ||||
|     from .cuda_handler import CudaHandler as GPUHandler | ||||
| else: | ||||
|     from .hip_context import HIPContext as KernelContext | ||||
|     from .hip_context import HIPContext as KernelContext | ||||
|     from .hip_handler import HIPHandler as GPUHandler | ||||
							
								
								
									
										30
									
								
								GPUSimulators/gpu/cuda_handler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								GPUSimulators/gpu/cuda_handler.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,30 @@ | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.gpu.handler import BaseGPUHandler | ||||
| from GPUSimulators.gpu import KernelContext | ||||
| 
 | ||||
| 
 | ||||
| class CudaHandler(BaseGPUHandler): | ||||
|     def __init__(self, context: KernelContext, module, function, arguments, | ||||
|                  grid_size): | ||||
|         super().__init__(context, module, function, arguments, grid_size) | ||||
| 
 | ||||
|         self.arguments = arguments | ||||
| 
 | ||||
|         self.kernel = module.get_function(function) | ||||
|         self.kernel.prepare(arguments) | ||||
| 
 | ||||
|         self.cfl_data = gpuarray.GPUArray(grid_size, dtype=np.float32) | ||||
| 
 | ||||
|     def prepared_call(self, grid_size, block_size, stream, args: list): | ||||
|         # if len(args) != len(self.arguments): | ||||
|         #     raise ValueError("The parameters do not match the defined arguments.") | ||||
| 
 | ||||
|         self.kernel.prepared_async_call(grid_size, block_size, stream, *args) | ||||
| 
 | ||||
|     def array_fill(self, data, stream): | ||||
|         self.cfl_data.fill(data, stream=stream) | ||||
| 
 | ||||
|     def array_min(self, stream): | ||||
|         return gpuarray.min(self.cfl_data, stream=stream).get() | ||||
							
								
								
									
										54
									
								
								GPUSimulators/gpu/handler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								GPUSimulators/gpu/handler.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,54 @@ | ||||
| from GPUSimulators.gpu import KernelContext | ||||
| 
 | ||||
| 
 | ||||
| class BaseGPUHandler(object): | ||||
|     """ | ||||
|     A handler to make GPU calls. | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, context: KernelContext, module, function: str, arguments: str, | ||||
|                  grid_size: tuple[int, int]): | ||||
|         """ | ||||
|         Create a new GPU handler. | ||||
| 
 | ||||
|         Args: | ||||
|             context: The KernelContext that is used to make the calls to the kernel. | ||||
|             module: The module created from KernelContext for a function. | ||||
|             function: Name of the function to use in the kernel. | ||||
|             arguments: A string of the argument types to parse to the kernel. | ||||
|             grid_size: The size of the array for the data of the simulation. | ||||
|         """ | ||||
| 
 | ||||
|     def prepared_call(self, grid_size, block_size: tuple[int, int, int], stream, args: list): | ||||
|         """ | ||||
|         Makes a call to the kernel on the GPU with the function that was used to initialize this object. | ||||
| 
 | ||||
|         Args: | ||||
|             grid_size: The size of the grid to do the computation of. | ||||
|             block_size: The block size, as a tuple. | ||||
|             stream: The GPU data stream. | ||||
|             args: Parameters to be passed into the GPU kernel. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
| 
 | ||||
|     def array_fill(self, data: float, stream): | ||||
|         """ | ||||
|         Fills the entire array with the same data that was parsed as the parameter. | ||||
| 
 | ||||
|         Args: | ||||
|             data: The data to fill the array with. | ||||
|             stream: The GPU data stream. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
| 
 | ||||
|     def array_min(self, stream) -> float: | ||||
|         """ | ||||
|         Gets the minimum value in the array stored in the handler. | ||||
| 
 | ||||
|         Args: | ||||
|             stream: The GPU data stream. | ||||
| 
 | ||||
|         Returns: | ||||
|             The minimum value in the stored array as a float. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
							
								
								
									
										70
									
								
								GPUSimulators/gpu/hip_handler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								GPUSimulators/gpu/hip_handler.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,70 @@ | ||||
| import ctypes | ||||
| 
 | ||||
| import numpy as np | ||||
| from hip import hip, hipblas | ||||
| 
 | ||||
| from GPUSimulators.common import hip_check | ||||
| from GPUSimulators.gpu.handler import BaseGPUHandler | ||||
| from GPUSimulators.gpu import KernelContext | ||||
| 
 | ||||
| 
 | ||||
| class HIPHandler(BaseGPUHandler): | ||||
|     def __init__(self, context: KernelContext, module, function, arguments, | ||||
|                  grid_size): | ||||
|         super().__init__(context, module, function, arguments, grid_size) | ||||
| 
 | ||||
|         self.kernel = hip_check(hip.hipModuleGetFunction(module, bytes(function, "utf-8"))) | ||||
|         self.context = context | ||||
| 
 | ||||
|         self.dtype = np.float32 | ||||
|         self.cfl_data_h = np.empty(grid_size, dtype=self.dtype) | ||||
| 
 | ||||
|         self.num_bytes = self.cfl_data_h.size * self.cfl_data_h.itemsize | ||||
|         self.cfl_data = hip_check(hip.hipMalloc(self.num_bytes)).configure( | ||||
|             typestr=np.finfo(self.dtype).dtype.name, shape=grid_size | ||||
|         ) | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         hip_check(hip.hipFree(self.cfl_data)) | ||||
| 
 | ||||
|     def prepared_call(self, grid_size, block_size, stream, args): | ||||
|         if len(grid_size) < 3: | ||||
|             grid_size = (*grid_size, 1) | ||||
| 
 | ||||
|         for i in range(len(args)): | ||||
|             val = args[i] | ||||
|             if isinstance(val, int) or isinstance(val, np.int32): | ||||
|                 args[i] = ctypes.c_int(val) | ||||
|             elif isinstance(val, float) or isinstance(val, np.float32): | ||||
|                 args[i] = ctypes.c_float(val) | ||||
| 
 | ||||
|         args = tuple(args) | ||||
| 
 | ||||
|         hip_check(hip.hipModuleLaunchKernel( | ||||
|             self.kernel, | ||||
|             *grid_size, | ||||
|             *block_size, | ||||
|             0, | ||||
|             stream, | ||||
|             None, | ||||
|             args | ||||
|         )) | ||||
| 
 | ||||
| 
 | ||||
|     def array_fill(self, data, stream): | ||||
|         self.cfl_data_h.fill(data) | ||||
| 
 | ||||
|         hip_check( | ||||
|             hip.hipMemcpyAsync(self.cfl_data, self.cfl_data_h, self.num_bytes, hip.hipMemcpyKind.hipMemcpyHostToDevice, | ||||
|                                stream)) | ||||
| 
 | ||||
|     def array_min(self, stream): | ||||
|         handle = hip_check(hipblas.hipblasCreate()) | ||||
| 
 | ||||
|         value = np.empty(1, self.dtype) | ||||
|         hip_check(hipblas.hipblasIsamin(handle, self.cfl_data.size, self.cfl_data, 1, value)) | ||||
|         hip_check(hipblas.hipblasDestroy(handle)) | ||||
| 
 | ||||
|         hip_check(hip.hipMemcpy(value, self.cfl_data, self.cfl_data_h.itemsize, hip.hipMemcpyKind.hipMemcpyDeviceToHost)) | ||||
| 
 | ||||
|         return value[0] | ||||
| @ -21,10 +21,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class EE2DKP07Dimsplit(BaseSimulator): | ||||
| @ -43,7 +43,7 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=8, | ||||
|                 compile_opts: list[str] = []): | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
| 
 | ||||
| @ -89,8 +89,8 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|         self.kernel.prepare("iiffffffiiPiPiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "KP07DimsplitKernel", "iiffffffiiPiPiPiPiPiPiPiPiPiiii", | ||||
|                                   self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -101,11 +101,11 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         dt_x = np.min(self.dx / (np.abs(rho_u / rho) + np.sqrt(gamma * rho))) | ||||
|         dt_y = np.min(self.dy / (np.abs(rho_v / rho) + np.sqrt(gamma * rho))) | ||||
|         self.dt = min(dt_x, dt_y) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number, external=True, internal=True): | ||||
|         self.substep_dimsplit(0.5 * dt, step_number, external, internal) | ||||
| @ -114,25 +114,25 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|         if external and internal: | ||||
|             # print("COMPLETE DOMAIN (dt=" + str(dt) + ")") | ||||
| 
 | ||||
|             self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             0, 0, | ||||
|                                             self.nx, self.ny) | ||||
|             self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                        [self.nx, self.ny, | ||||
|                                        self.dx, self.dy, dt, | ||||
|                                        self.g, | ||||
|                                        self.gamma, | ||||
|                                        self.theta, | ||||
|                                        substep, | ||||
|                                        self.boundary_conditions, | ||||
|                                        self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                        self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                        self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                        self.u0[3].data, self.u0[3].get_strides()[0], | ||||
|                                        self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                        self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                        self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                        self.u1[3].data, self.u1[3].get_strides()[0], | ||||
|                                        self.handler.cfl_data, | ||||
|                                        0, 0, | ||||
|                                        self.nx, self.ny]) | ||||
|             return | ||||
| 
 | ||||
|         if external and not internal: | ||||
| @ -145,121 +145,121 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|             # NORTH | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #  (0, ny-y_halo) x (nx, ny) | ||||
|             self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|             self.handler.prepared_call(ns_grid_size, self.block_size, self.stream, | ||||
|                                             [self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                             self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                             self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                             self.u0[3].data, self.u0[3].get_strides()[0], | ||||
|                                             self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                             self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                             self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                             self.u1[3].data, self.u1[3].get_strides()[0], | ||||
|                                             self.handler.cfl_data, | ||||
|                                             0, self.ny - int(self.u0[0].y_halo), | ||||
|                                             self.nx, self.ny) | ||||
|                                             self.nx, self.ny]) | ||||
| 
 | ||||
|             # SOUTH | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #   (0, 0) x (nx, y_halo) | ||||
|             self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|             self.handler.prepared_call(ns_grid_size, self.block_size, self.stream, | ||||
|                                             [self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                             self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                             self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                             self.u0[3].data, self.u0[3].get_strides()[0], | ||||
|                                             self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                             self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                             self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                             self.u1[3].data, self.u1[3].get_strides()[0], | ||||
|                                             self.handler.cfl_data, | ||||
|                                             0, 0, | ||||
|                                             self.nx, int(self.u0[0].y_halo)) | ||||
|                                             self.nx, int(self.u0[0].y_halo)]) | ||||
| 
 | ||||
|             we_grid_size = (1, self.grid_size[1]) | ||||
| 
 | ||||
|             # WEST | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #  (0, 0) x (x_halo, ny) | ||||
|             self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|             self.handler.prepared_call(we_grid_size, self.block_size, self.stream, | ||||
|                                             [self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                             self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                             self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                             self.u0[3].data, self.u0[3].get_strides()[0], | ||||
|                                             self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                             self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                             self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                             self.u1[3].data, self.u1[3].get_strides()[0], | ||||
|                                             self.handler.cfl_data, | ||||
|                                             0, 0, | ||||
|                                             int(self.u0[0].x_halo), self.ny) | ||||
|                                             int(self.u0[0].x_halo), self.ny]) | ||||
| 
 | ||||
|             # EAST | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #   (nx-x_halo, 0) x (nx, ny) | ||||
|             self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|             self.handler.prepared_call(we_grid_size, self.block_size, self.stream, | ||||
|                                             [self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                             self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                             self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                             self.u0[3].data, self.u0[3].get_strides()[0], | ||||
|                                             self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                             self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                             self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                             self.u1[3].data, self.u1[3].get_strides()[0], | ||||
|                                             self.handler.cfl_data, | ||||
|                                             self.nx - int(self.u0[0].x_halo), 0, | ||||
|                                             self.nx, self.ny) | ||||
|                                             self.nx, self.ny]) | ||||
|             return | ||||
| 
 | ||||
|         if internal and not external: | ||||
|             # INTERNAL DOMAIN | ||||
|             #         (x0, y0) x (x1, y1) | ||||
|             # (x_halo, y_halo) x (nx - x_halo, ny - y_halo) | ||||
|             self.kernel.prepared_async_call(self.grid_size, self.block_size, self.internal_stream, | ||||
|                                             self.nx, self.ny, | ||||
|             self.handler.prepared_call(self.grid_size, self.block_size, self.internal_stream, | ||||
|                                             [self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                             self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                             self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                             self.u0[3].data, self.u0[3].get_strides()[0], | ||||
|                                             self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                             self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                             self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                             self.u1[3].data, self.u1[3].get_strides()[0], | ||||
|                                             self.handler.cfl_data, | ||||
|                                             int(self.u0[0].x_halo), int(self.u0[0].y_halo), | ||||
|                                             self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo)) | ||||
|                                             self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo)]) | ||||
|             return | ||||
| 
 | ||||
|     def swap_buffers(self): | ||||
| @ -275,5 +275,5 @@ class EE2DKP07Dimsplit(BaseSimulator): | ||||
|         return | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(stream=self.stream) | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -22,10 +22,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class Force(BaseSimulator): | ||||
| @ -85,8 +85,7 @@ class Force(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("FORCEKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "FORCEKernel", "iiffffiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -97,7 +96,6 @@ class Force(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
| @ -106,23 +104,23 @@ class Force(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         [self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                         self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                         self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                         self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                         self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                         self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                         self.handler.cfl_data, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|                                         self.nx, self.ny]) | ||||
| 
 | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
| @ -134,5 +132,5 @@ class Force(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt | ||||
|  | ||||
| @ -18,13 +18,15 @@ GNU General Public License for more details. | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| import ctypes | ||||
| import time | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.common.arrays import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class HLL(BaseSimulator): | ||||
| @ -84,8 +86,7 @@ class HLL(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLLKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "HLLKernel", "iiffffiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -96,7 +97,6 @@ class HLL(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
| @ -104,23 +104,23 @@ class HLL(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                    [self.nx, self.ny, | ||||
|                                    self.dx, self.dy, dt, | ||||
|                                    self.g, | ||||
|                                    self.boundary_conditions, | ||||
|                                    self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                    self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                    self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                    self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                    self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                    self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                    self.handler.cfl_data, | ||||
|                                    0, 0, | ||||
|                                    self.nx, self.ny]) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def get_output(self): | ||||
| @ -131,5 +131,5 @@ class HLL(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -21,10 +21,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class HLL2(BaseSimulator): | ||||
| @ -86,8 +86,7 @@ class HLL2(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLL2Kernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "HLL2Kernel", "iifffffiiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -98,7 +97,6 @@ class HLL2(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
| @ -107,28 +105,28 @@ class HLL2(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substep_dimsplit(dt * 0.5, step_number) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         [self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.theta, | ||||
|                                         substep, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                         self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                         self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                         self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                         self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                         self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                         self.handler.cfl_data, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|                                         self.nx, self.ny]) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def get_output(self): | ||||
| @ -139,5 +137,5 @@ class HLL2(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -26,10 +26,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition, conversion | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class KP07(BaseSimulator): | ||||
| @ -93,8 +93,7 @@ class KP07(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07Kernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "KP07Kernel", "iifffffiiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -105,7 +104,6 @@ class KP07(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
| @ -114,28 +112,28 @@ class KP07(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substep_rk(dt, step_number) | ||||
| 
 | ||||
|     def substep_rk(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         [self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.theta, | ||||
|                                         conversion.step_order_to_coded_int(step=substep, order=self.order), | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                         self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                         self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                         self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                         self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                         self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                         self.handler.cfl_data, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|                                         self.nx, self.ny]) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def get_output(self): | ||||
| @ -146,5 +144,5 @@ class KP07(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt * 0.5 ** (self.order - 1) | ||||
|  | ||||
| @ -26,10 +26,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class KP07Dimsplit(BaseSimulator): | ||||
| @ -93,8 +93,7 @@ class KP07Dimsplit(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "KP07DimsplitKernel", "iifffffiiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -105,7 +104,6 @@ class KP07Dimsplit(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              self.gc_x, self.gc_y, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
| @ -114,28 +112,28 @@ class KP07Dimsplit(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substep_dimsplit(dt * 0.5, step_number) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         [self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.theta, | ||||
|                                         substep, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                         self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                         self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                         self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                         self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                         self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                         self.handler.cfl_data, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|                                         self.nx, self.ny]) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def get_output(self): | ||||
| @ -146,5 +144,5 @@ class KP07Dimsplit(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -22,10 +22,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class LxF(BaseSimulator): | ||||
| @ -85,8 +85,7 @@ class LxF(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("LxFKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "LxFKernel", "iiffffiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to thedevice | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -97,7 +96,6 @@ class LxF(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
| @ -106,7 +104,7 @@ class LxF(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         """ | ||||
| @ -114,20 +112,20 @@ class LxF(BaseSimulator): | ||||
|             dt: Size of each timestep (seconds) | ||||
|         """ | ||||
| 
 | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         [self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                         self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                         self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                         self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                         self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                         self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                         self.handler.cfl_data, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|                                         self.nx, self.ny]) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def get_output(self): | ||||
| @ -138,5 +136,5 @@ class LxF(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -22,10 +22,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.simulator import BaseSimulator, BoundaryCondition | ||||
| from GPUSimulators.gpu import GPUHandler | ||||
| 
 | ||||
| 
 | ||||
| class WAF(BaseSimulator): | ||||
| @ -85,8 +85,7 @@ class WAF(BaseSimulator): | ||||
|                                         'hip': compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("WAFKernel") | ||||
|         self.kernel.prepare("iiffffiiPiPiPiPiPiPiPiiii") | ||||
|         self.handler = GPUHandler(context, module, "WAFKernel", "iiffffiiPiPiPiPiPiPiPiiii", self.grid_size) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
| @ -97,7 +96,6 @@ class WAF(BaseSimulator): | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
| @ -106,27 +104,27 @@ class WAF(BaseSimulator): | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|         self.handler.array_fill(self.dt, self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substep_dimsplit(dt * 0.5, step_number) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|         self.handler.prepared_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         [self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         substep, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         self.u0[0].data, self.u0[0].get_strides()[0], | ||||
|                                         self.u0[1].data, self.u0[1].get_strides()[0], | ||||
|                                         self.u0[2].data, self.u0[2].get_strides()[0], | ||||
|                                         self.u1[0].data, self.u1[0].get_strides()[0], | ||||
|                                         self.u1[1].data, self.u1[1].get_strides()[0], | ||||
|                                         self.u1[2].data, self.u1[2].get_strides()[0], | ||||
|                                         self.handler.cfl_data, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|                                         self.nx, self.ny]) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def get_output(self): | ||||
| @ -137,5 +135,5 @@ class WAF(BaseSimulator): | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         max_dt = self.handler.array_min(self.stream) | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
							
								
								
									
										498
									
								
								HIPTestSchemes.ipynb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										498
									
								
								HIPTestSchemes.ipynb
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -13,6 +13,11 @@ dependencies: | ||||
| - pytools | ||||
| - netcdf4 | ||||
| - scipy | ||||
| - matplotlib | ||||
| - ipyparallel | ||||
| - jupyter | ||||
| - nb_conda_kernels | ||||
| - line_profiler | ||||
| - tqdm | ||||
| - pip: | ||||
|   - hip-python==6.4.1.552.39 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg