feat(common): make subpackage for arrays to split hip and cuda

This commit is contained in:
Anthony Berg 2025-06-24 21:11:19 +02:00
parent bfed972046
commit d1df00267a
10 changed files with 145 additions and 85 deletions

View File

@ -1,7 +1,7 @@
from .arkawa_2d import ArakawaA2D
from GPUSimulators.common.arrays.cuda.arkawa2d import ArakawaA2D
from .common import *
from .cuda_array_2d import CudaArray2D
from .cuda_array_3d import CudaArray3D
from GPUSimulators.common.arrays.cuda.array2d import CudaArray2D
from GPUSimulators.common.arrays.cuda.array3d import CudaArray3D
from .data_dumper import DataDumper
from .ip_engine import IPEngine
from .popen_file_buffer import PopenFileBuffer

View File

@ -0,0 +1,11 @@
from os import environ
# TODO this is temporary, remove
from .cuda import *
__env_name = 'GPU_LANG'
if __env_name in environ and environ.get(__env_name).lower() == "cuda":
from .cuda import *
else:
from .hip import *

View File

@ -1,14 +1,11 @@
import logging
import numpy as np
import pycuda.gpuarray
from GPUSimulators.common.cuda_array_2d import CudaArray2D
from GPUSimulators.common.arrays import Array2D
class ArakawaA2D:
class BaseArakawaA2D(object):
"""
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
A base class to be used to represent an Arakawa A type (unstaggered, logically Cartesian) grid.
"""
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
@ -17,8 +14,9 @@ class ArakawaA2D:
"""
self.logger = logging.getLogger(__name__)
self.gpu_variables = []
for cpu_variable in cpu_variables:
self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
self.gpu_variables += [Array2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
def __getitem__(self, key):
if type(key) != int:
@ -43,15 +41,3 @@ class ArakawaA2D:
# stream.synchronize()
return cpu_variables
def check(self):
"""
Checks that data is still sane
"""
for i, gpu_variable in enumerate(self.gpu_variables):
var_sum = pycuda.gpuarray.sum(gpu_variable.data).get()
self.logger.debug(f"Data {i} with size [{gpu_variable.nx} x {gpu_variable.ny}] "
+ f"has average {var_sum / (gpu_variable.nx * gpu_variable.ny)}")
if np.isnan(var_sum):
raise ValueError("Data contains NaN values!")

View File

@ -0,0 +1,39 @@
import logging
import numpy as np
class BaseArray2D(object):
"""
A base class that holds 2D data. To be used depending on the GPGPU language.
"""
def __init__(self, nx, ny, x_halo, y_halo, cpu_data=None, ):
"""
Uploads initial data to the CUDA device
"""
self.logger = logging.getLogger(__name__)
self.nx = nx
self.ny = ny
self.x_halo = x_halo
self.y_halo = y_halo
self.nx_halo = nx + 2 * x_halo
self.ny_halo = ny + 2 * y_halo
# If we don't have any data, just allocate and return
if cpu_data is None:
return
# Make sure data is in proper format
if cpu_data.shape != (self.ny_halo, self.nx_halo) and cpu_data.shape != (self.ny, self.nx):
raise ValueError(
f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.ny, self.nx))} / "
+ f"{str((self.ny_halo, self.nx_halo))}")
if cpu_data.itemsize != 4:
raise ValueError("Wrong size of data type")
if np.isfortran(cpu_data):
raise TypeError("Wrong datatype (Fortran, expected C)")

View File

@ -0,0 +1,41 @@
import logging
import numpy as np
class BaseArray3D(object):
"""
A base class that holds 3D data. To be used depending on the GPGPU language.
"""
def __init__(self, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None):
"""
Uploads initial data to the CL device
"""
self.logger = logging.getLogger(__name__)
self.nx = nx
self.ny = ny
self.nz = nz
self.x_halo = x_halo
self.y_halo = y_halo
self.z_halo = z_halo
self.nx_halo = nx + 2 * x_halo
self.ny_halo = ny + 2 * y_halo
self.nz_halo = nz + 2 * z_halo
# If we don't have any data, just allocate and return
if cpu_data is None:
return
# Make sure data is in proper format
if (cpu_data.shape != (self.nz_halo, self.ny_halo, self.nx_halo)
and cpu_data.shape != (self.nz, self.ny, self.nx)):
raise ValueError(f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.nz, self.ny, self.nx))} / "
+ f"{str((self.nz_halo, self.ny_halo, self.nx_halo))}")
if cpu_data.itemsize != 4:
raise ValueError("Wrong size of data type")
if np.isfortran(cpu_data):
raise TypeError("Wrong datatype (Fortran, expected C)")

View File

@ -0,0 +1,3 @@
from arkawa2d import ArakawaA2D
from array2d import CudaArray2D as Array2D
from array3d import CudaArray3D as Array3D

View File

@ -0,0 +1,28 @@
import numpy as np
import pycuda.gpuarray
from GPUSimulators.common.arrays.arkawa2d import BaseArakawaA2D
class ArakawaA2D(BaseArakawaA2D):
"""
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
"""
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
"""
Uploads initial data to the GPU device
"""
super().__init__(stream, nx, ny, halo_x, halo_y, cpu_variables)
def check(self):
"""
Checks that data is still sane
"""
for i, gpu_variable in enumerate(self.gpu_variables):
var_sum = pycuda.gpuarray.sum(gpu_variable.data).get()
self.logger.debug(f"Data {i} with size [{gpu_variable.nx} x {gpu_variable.ny}] "
+ f"has average {var_sum / (gpu_variable.nx * gpu_variable.ny)}")
if np.isnan(var_sum):
raise ValueError("Data contains NaN values!")

View File

@ -1,13 +1,13 @@
import logging
import numpy as np
import pycuda.gpuarray
import pycuda.driver as cuda
from pycuda.tools import PageLockedMemoryPool
from GPUSimulators.common.arrays.array2d import BaseArray2D
class CudaArray2D:
class CudaArray2D(BaseArray2D):
"""
Class that holds 2D CUDA data
"""
@ -17,40 +17,17 @@ class CudaArray2D:
Uploads initial data to the CUDA device
"""
self.logger = logging.getLogger(__name__)
self.nx = nx
self.ny = ny
self.x_halo = x_halo
self.y_halo = y_halo
nx_halo = nx + 2 * x_halo
ny_halo = ny + 2 * y_halo
super().__init__(nx, ny, x_halo, y_halo, cpu_data)
# self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
# Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
self.data = pycuda.gpuarray.zeros((self.ny_halo, self.nx_halo), dtype)
# For returning to download
self.memorypool = PageLockedMemoryPool()
# If we don't have any data, just allocate and return
if cpu_data is None:
return
# Make sure data is in proper format
if cpu_data.shape != (ny_halo, nx_halo) and cpu_data.shape != (self.ny, self.nx):
raise ValueError(
f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.ny, self.nx))} / {str((ny_halo, nx_halo))}")
if cpu_data.itemsize != 4:
raise ValueError("Wrong size of data type")
if np.isfortran(cpu_data):
raise TypeError("Wrong datatype (Fortran, expected C)")
# Create a copy object from host to device
x = (nx_halo - cpu_data.shape[1]) // 2
y = (ny_halo - cpu_data.shape[0]) // 2
x = (self.nx_halo - cpu_data.shape[1]) // 2
y = (self.ny_halo - cpu_data.shape[0]) // 2
self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
# self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)

View File

@ -1,12 +1,12 @@
import logging
import numpy as np
import pycuda.gpuarray
import pycuda.driver as cuda
from pycuda.tools import PageLockedMemoryPool
from GPUSimulators.common.arrays.array3d import BaseArray3D
class CudaArray3D:
class CudaArray3D(BaseArray3D):
"""
Class that holds 3D data
"""
@ -16,49 +16,24 @@ class CudaArray3D:
Uploads initial data to the CL device
"""
self.logger = logging.getLogger(__name__)
self.nx = nx
self.ny = ny
self.nz = nz
self.x_halo = x_halo
self.y_halo = y_halo
self.z_halo = z_halo
nx_halo = nx + 2 * x_halo
ny_halo = ny + 2 * y_halo
nz_halo = nz + 2 * z_halo
super().__init__(nx, ny, nz, x_halo, y_halo, z_halo, cpu_data)
# self.logger.debug("Allocating [%dx%dx%d] buffer", self.nx, self.ny, self.nz)
# Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
self.data = pycuda.gpuarray.zeros((self.nz_halo, self.ny_halo, self.nx_halo), dtype)
# For returning to download
self.memorypool = PageLockedMemoryPool()
# If we don't have any data, just allocate and return
if cpu_data is None:
return
# Make sure data is in proper format
if (cpu_data.shape != (nz_halo, ny_halo, nx_halo)
and cpu_data.shape != (self.nz, self.ny, self.nx)):
raise ValueError(f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.nz, self.ny, self.nx))} / {str((nz_halo, ny_halo, nx_halo))}")
if cpu_data.itemsize != 4:
raise ValueError("Wrong size of data type")
if np.isfortran(cpu_data):
raise TypeError("Wrong datatype (Fortran, expected C)")
# Create a copy object from host to device
copy = cuda.Memcpy3D()
copy.set_src_host(cpu_data)
copy.set_dst_device(self.data.gpudata)
# Set offsets of destination
x_offset = (nx_halo - cpu_data.shape[2]) // 2
y_offset = (ny_halo - cpu_data.shape[1]) // 2
z_offset = (nz_halo - cpu_data.shape[0]) // 2
x_offset = (self.nx_halo - cpu_data.shape[2]) // 2
y_offset = (self.ny_halo - cpu_data.shape[1]) // 2
z_offset = (self.nz_halo - cpu_data.shape[0]) // 2
copy.dst_x_in_bytes = x_offset * self.data.strides[1]
copy.dst_y = y_offset
copy.dst_z = z_offset