Compare commits

...

7 Commits

Author SHA1 Message Date
Anthony Berg
bf2681fbf0 debug: add shorter stop time to investigate NaNs 2025-09-03 18:33:05 +02:00
Anthony Berg
a1a653e6d8 feat: add more save times to mpi testing 2025-09-03 18:10:25 +02:00
Anthony Berg
87474dcb20 feat(array): improve checking the array for NaNs 2025-09-03 18:08:45 +02:00
Anthony Berg
26c0eab7c8 fix(sim): add more debugging logs in run_simulation 2025-09-03 18:07:48 +02:00
Anthony Berg
833f5bf997 fix(gpu): typing for dictionary in parameter 2025-09-03 18:07:13 +02:00
Anthony Berg
cd89a343bf feat(sim): increase default tolerance for end time in simulate 2025-09-03 18:06:43 +02:00
Anthony Berg
97c2fd47e3 fix(gpu): correct syntax for dictionary typing 2025-09-03 18:04:43 +02:00
9 changed files with 64 additions and 50 deletions

View File

@ -1,4 +1,5 @@
import ctypes
from typing import Union
import numpy as np
from hip import hip, hipblas
@ -13,12 +14,28 @@ class HIPArakawaA2D(BaseArakawaA2D):
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
"""
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
def __init__(self, stream: hip.ihipStream_t, nx: int, ny: int, halo_x: int, halo_y: int, cpu_variables: list[Union[np.ndarray, None]]):
"""
Uploads initial data to the GPU device
"""
super().__init__(stream, nx, ny, halo_x, halo_y, cpu_variables, HIPArray2D)
# Variables for ``__sum_array``
# TODO should have a way of not hardcoding the dtype
dtype = np.float32
self.__result_h = np.zeros(1, dtype=dtype)
self.__num_bytes = self.__result_h.itemsize
self.__result_d = hip_check(hip.hipMalloc(self.__num_bytes))
self.__total_sum_d = hip_check(hip.hipMalloc(self.__num_bytes))
self.__handle = hip_check(hipblas.hipblasCreate())
def __del__(self):
# Cleanup GPU variables in ``__sum_array``
hip_check(hipblas.hipblasDestroy(self.__handle))
hip_check(hip.hipFree(self.__result_d))
hip_check(hip.hipFree(self.__total_sum_d))
def check(self):
"""
Checks that data is still sane
@ -31,8 +48,7 @@ class HIPArakawaA2D(BaseArakawaA2D):
if np.isnan(var_sum):
raise ValueError("Data contains NaN values!")
@staticmethod
def __sum_array(array: HIPArray2D) -> np.ndarray[tuple[int]]:
def __sum_array(self, array: HIPArray2D) -> np.ndarray[tuple[int]]:
"""
Sum all the elements in HIPArray2D using hipblas.
Args:
@ -40,35 +56,22 @@ class HIPArakawaA2D(BaseArakawaA2D):
Returns:
The sum of all the elements in ``array``.
"""
dtype = array.dtype
result_h = np.zeros(1, dtype=dtype)
num_bytes = dtype.itemsize
result_d = hip_check(hip.hipMalloc(num_bytes))
# Sum the ``data_h`` array using hipblas
handle = hip_check(hipblas.hipblasCreate())
# Using pitched memory, so we need to sum row by row
total_sum_d = hip_check(hip.hipMalloc(num_bytes))
hip_check(hip.hipMemset(total_sum_d, 0, num_bytes))
hip_check(hip.hipMemset(self.__total_sum_d, 0, self.__num_bytes))
width, height = array.shape
for y in range(height):
row_ptr = int(array.data) + y * array.pitch_d
hip_check(hipblas.hipblasSasum(handle, width, row_ptr, 1, result_d))
hip_check(hipblas.hipblasSasum(self.__handle, width, row_ptr, 1, self.__result_d))
hip_check(hipblas.hipblasSaxpy(handle, 1, ctypes.c_float(1.0), result_d, 1, total_sum_d, 1))
hip_check(hip.hipMemcpy(result_h, total_sum_d, num_bytes, hip.hipMemcpyKind.hipMemcpyDeviceToHost))
hip_check(
hipblas.hipblasSaxpy(self.__handle, 1, ctypes.c_float(1.0), self.__result_d, 1, self.__total_sum_d, 1))
# Copy over the result from the device
hip_check(hip.hipMemcpy(result_h, total_sum_d, num_bytes, hip.hipMemcpyKind.hipMemcpyDeviceToHost))
hip_check(hip.hipMemcpy(self.__result_h, self.__total_sum_d, self.__num_bytes,
hip.hipMemcpyKind.hipMemcpyDeviceToHost))
# Cleanup
hip_check(hipblas.hipblasDestroy(handle))
hip_check(hip.hipFree(result_d))
hip_check(hip.hipFree(total_sum_d))
return result_h
return self.__result_h

View File

@ -185,6 +185,7 @@ def run_simulation(simulator, simulator_args, outfile, save_times, save_var_name
with tqdm(total=save_times[-1], desc="Simulation progress", unit="sim s", disable=not progress_bar) as pbar:
# Start simulation loop
for save_step, t_step in enumerate(t_steps):
logger.debug(f"Starting step: {save_step}.")
t_end = save_step
# Sanity check simulator
@ -197,8 +198,10 @@ def run_simulation(simulator, simulator_args, outfile, save_times, save_var_name
profiling_data_sim_runner["start"]["t_full_step"] += time.time()
# Simulate
logger.debug(f"Simulating for {t_step} s.")
if t_step > 0.0:
sim.simulate(t_step, dt, pbar=pbar)
logger.debug(f"Completed simulation of {t_step} s.")
profiling_data_sim_runner["end"]["t_full_step"] += time.time()
@ -208,8 +211,11 @@ def run_simulation(simulator, simulator_args, outfile, save_times, save_var_name
save_vars = sim.download(download_vars)
# Save to file
logger.debug(f"Saving step [{save_step}] to netCDF.")
for i, var_name in enumerate(save_var_names):
logger.debug(f"Saving {var_name} ({grid_x0}:{grid_x1}, {grid_y0}:{grid_y1}) to netCDF.")
ncvars[var_name][save_step, grid_y0:grid_y1, grid_x0:grid_x1] = save_vars[i]
logger.debug(f"Saved step [{save_step}].")
profiling_data_sim_runner["end"]["t_nc_write"] += time.time()

View File

@ -110,7 +110,7 @@ class CudaContext(Context):
def get_module(self, kernel_filename: str,
function: str,
include_dirs: dict = None,
defines: dict[str: dict] = None,
defines: dict[str, dict] = None,
compile_args: dict = None,
jit_compile_args: dict = None) -> cuda.Module:
"""

View File

@ -64,8 +64,8 @@ class HIPContext(Context):
def get_module(self, kernel_filename: str,
function: str,
include_dirs: list[str] = None,
defines: dict[str: int] = None,
compile_args: dict[str: list] = None,
defines: dict[str, int] = None,
compile_args: dict[str, list] = None,
jit_compile_args: dict = None):
"""
Reads a ``.hip`` file and creates a HIP kernel from that.

View File

@ -21,19 +21,22 @@ class HIPHandler(BaseGPUHandler):
self.num_bytes = self.cfl_data_h.size * self.cfl_data_h.itemsize
self.cfl_data = hip_check(hip.hipMalloc(self.num_bytes)).configure(
typestr=np.finfo(self.dtype).dtype.name, shape=grid_size
typestr=self.cfl_data_h.dtype.str, shape=grid_size
)
def __del__(self):
hip_check(hip.hipFree(self.cfl_data))
def prepared_call(self, grid_size, block_size, stream, args):
if len(grid_size) < 3:
grid_size = (*grid_size, 1)
def prepared_call(self, grid_size: tuple[int, int], block_size: tuple[int, int, int], stream: hip.ihipStream_t,
args: list):
grid = hip.dim3(*grid_size)
block = hip.dim3(*block_size)
for i in range(len(args)):
val = args[i]
if isinstance(val, int) or isinstance(val, np.int32):
if isinstance(val, np.int64):
args[i] = ctypes.c_int64(val)
elif isinstance(val, int) or isinstance(val, np.int32):
args[i] = ctypes.c_int(val)
elif isinstance(val, float) or isinstance(val, np.float32):
args[i] = ctypes.c_float(val)
@ -42,29 +45,31 @@ class HIPHandler(BaseGPUHandler):
hip_check(hip.hipModuleLaunchKernel(
self.kernel,
*grid_size,
*block_size,
0,
stream,
None,
args
*grid,
*block,
sharedMemBytes=0,
stream=stream,
kernelParams=None,
extra=args
))
def array_fill(self, data, stream):
def array_fill(self, data: float, stream: hip.ihipStream_t):
self.cfl_data_h.fill(data)
hip_check(
hip.hipMemcpyAsync(self.cfl_data, self.cfl_data_h, self.num_bytes, hip.hipMemcpyKind.hipMemcpyHostToDevice,
stream))
def array_min(self, stream):
def array_min(self, stream: hip.ihipStream_t) -> float:
handle = hip_check(hipblas.hipblasCreate())
value = np.empty(1, self.dtype)
hip_check(hipblas.hipblasIsamin(handle, self.cfl_data.size, self.cfl_data, 1, value))
value_h = np.empty(1, self.dtype)
value_d = hip_check(hip.hipMalloc(value_h.itemsize))
hip_check(hipblas.hipblasIsamin(handle, self.cfl_data.size, self.cfl_data, 1, value_d))
hip_check(hipblas.hipblasDestroy(handle))
hip_check(hip.hipMemcpy(value, self.cfl_data, self.cfl_data_h.itemsize, hip.hipMemcpyKind.hipMemcpyDeviceToHost))
hip_check(
hip.hipMemcpy(value_h, self.cfl_data, self.cfl_data_h.itemsize, hip.hipMemcpyKind.hipMemcpyDeviceToHost))
return value[0]

View File

@ -80,16 +80,16 @@ class BaseMPISimulator(BaseSimulator):
})
gi, gj = grid.get_coordinate()
# print("gi: " + str(gi) + ", gj: " + str(gj))
if gi == 0 and boundary_conditions.west != BoundaryCondition.Type.Periodic:
if (gi == 0 and boundary_conditions.west != BoundaryCondition.Type.Periodic):
self.west = None
new_boundary_conditions.west = boundary_conditions.west
if gj == 0 and boundary_conditions.south != BoundaryCondition.Type.Periodic:
if (gj == 0 and boundary_conditions.south != BoundaryCondition.Type.Periodic):
self.south = None
new_boundary_conditions.south = boundary_conditions.south
if gi == grid.x - 1 and boundary_conditions.east != BoundaryCondition.Type.Periodic:
if (gi == grid.x - 1 and boundary_conditions.east != BoundaryCondition.Type.Periodic):
self.east = None
new_boundary_conditions.east = boundary_conditions.east
if gj == grid.y - 1 and boundary_conditions.north != BoundaryCondition.Type.Periodic:
if (gj == grid.y - 1 and boundary_conditions.north != BoundaryCondition.Type.Periodic):
self.north = None
new_boundary_conditions.north = boundary_conditions.north
sim.set_boundary_conditions(new_boundary_conditions)

View File

@ -42,7 +42,7 @@ class BoundaryCondition(object):
Periodic = 2,
Reflective = 3
def __init__(self, types: dict[str: Type.Reflective]=None):
def __init__(self, types: dict[str, Type]=None):
"""
Constructor
"""

View File

@ -104,7 +104,7 @@ class BaseSimulator(object):
self.dt = dt
if tolerance is None:
tolerance = 0.000000001
tolerance = 0.00001
while self.sim_time() < t_end:
# Prevent an infinite loop from occurring from tiny numbers

View File

@ -116,7 +116,7 @@ gamma = 1.4
# save_times = np.linspace(0, 0.000009, 2)
# save_times = np.linspace(0, 0.000099, 11)
# save_times = np.linspace(0, 0.000099, 2)
save_times = np.linspace(0, 0.1, 5)
save_times = np.linspace(0, 0.1, 21)
outfile = "mpi_out.nc4"
save_var_names = ['rho', 'rho_u', 'rho_v', 'E']