feat(common): make subpackage for arrays to split hip and cuda

2025-10-31 20:27:40 +01:00 · 2025-06-24 21:11:19 +02:00 · 2025-06-24 21:11:19 +02:00 · d1df00267a
commit d1df00267a
parent bfed972046
10 changed files with 145 additions and 85 deletions
--- a/GPUSimulators/common/init.py
+++ b/GPUSimulators/common/init.py
@ -1,7 +1,7 @@
-from .arkawa_2d import ArakawaA2D
+from GPUSimulators.common.arrays.cuda.arkawa2d import ArakawaA2D
 from .common import *
-from .cuda_array_2d import CudaArray2D
-from .cuda_array_3d import CudaArray3D
+from GPUSimulators.common.arrays.cuda.array2d import CudaArray2D
+from GPUSimulators.common.arrays.cuda.array3d import CudaArray3D
 from .data_dumper import DataDumper
 from .ip_engine import IPEngine
 from .popen_file_buffer import PopenFileBuffer
--- a/GPUSimulators/common/arrays/init.py
+++ b/GPUSimulators/common/arrays/init.py
@ -0,0 +1,11 @@
+from os import environ
+
+# TODO this is temporary, remove
+from .cuda import *
+
+__env_name = 'GPU_LANG'
+
+if __env_name in environ and environ.get(__env_name).lower() == "cuda":
+    from .cuda import *
+else:
+    from .hip import *
--- a/GPUSimulators/common/arrays/arkawa2d.py
+++ b/GPUSimulators/common/arrays/arkawa2d.py
@ -1,14 +1,11 @@
 import logging

-import numpy as np
-import pycuda.gpuarray
-
-from GPUSimulators.common.cuda_array_2d import CudaArray2D
+from GPUSimulators.common.arrays import Array2D


-class ArakawaA2D:
+class BaseArakawaA2D(object):
    """
-    A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
+    A base class to be used to represent an Arakawa A type (unstaggered, logically Cartesian) grid.
    """

    def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
@ -17,8 +14,9 @@ class ArakawaA2D:
        """
        self.logger = logging.getLogger(__name__)
        self.gpu_variables = []
+
        for cpu_variable in cpu_variables:
-            self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
+            self.gpu_variables += [Array2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]

    def __getitem__(self, key):
        if type(key) != int:
@ -43,15 +41,3 @@ class ArakawaA2D:

        # stream.synchronize()
        return cpu_variables
-
-    def check(self):
-        """
-        Checks that data is still sane
-        """
-        for i, gpu_variable in enumerate(self.gpu_variables):
-            var_sum = pycuda.gpuarray.sum(gpu_variable.data).get()
-            self.logger.debug(f"Data {i} with size [{gpu_variable.nx} x {gpu_variable.ny}] "
-                              + f"has average {var_sum / (gpu_variable.nx * gpu_variable.ny)}")
-
-            if np.isnan(var_sum):
-                raise ValueError("Data contains NaN values!")
--- a/GPUSimulators/common/arrays/array2d.py
+++ b/GPUSimulators/common/arrays/array2d.py
@ -0,0 +1,39 @@
+import logging
+
+import numpy as np
+
+
+class BaseArray2D(object):
+    """
+    A base class that holds 2D data. To be used depending on the GPGPU language.
+    """
+
+    def __init__(self, nx, ny, x_halo, y_halo, cpu_data=None, ):
+        """
+        Uploads initial data to the CUDA device
+        """
+
+        self.logger = logging.getLogger(__name__)
+        self.nx = nx
+        self.ny = ny
+        self.x_halo = x_halo
+        self.y_halo = y_halo
+
+        self.nx_halo = nx + 2 * x_halo
+        self.ny_halo = ny + 2 * y_halo
+
+        # If we don't have any data, just allocate and return
+        if cpu_data is None:
+            return
+
+        # Make sure data is in proper format
+        if cpu_data.shape != (self.ny_halo, self.nx_halo) and cpu_data.shape != (self.ny, self.nx):
+            raise ValueError(
+                f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.ny, self.nx))} / "
+                + f"{str((self.ny_halo, self.nx_halo))}")
+
+        if cpu_data.itemsize != 4:
+            raise ValueError("Wrong size of data type")
+
+        if np.isfortran(cpu_data):
+            raise TypeError("Wrong datatype (Fortran, expected C)")
--- a/GPUSimulators/common/arrays/array3d.py
+++ b/GPUSimulators/common/arrays/array3d.py
@ -0,0 +1,41 @@
+import logging
+
+import numpy as np
+
+
+class BaseArray3D(object):
+    """
+    A base class that holds 3D data. To be used depending on the GPGPU language.
+    """
+    def __init__(self, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None):
+        """
+        Uploads initial data to the CL device
+        """
+
+        self.logger = logging.getLogger(__name__)
+        self.nx = nx
+        self.ny = ny
+        self.nz = nz
+        self.x_halo = x_halo
+        self.y_halo = y_halo
+        self.z_halo = z_halo
+
+        self.nx_halo = nx + 2 * x_halo
+        self.ny_halo = ny + 2 * y_halo
+        self.nz_halo = nz + 2 * z_halo
+
+        # If we don't have any data, just allocate and return
+        if cpu_data is None:
+            return
+
+        # Make sure data is in proper format
+        if (cpu_data.shape != (self.nz_halo, self.ny_halo, self.nx_halo)
+                and cpu_data.shape != (self.nz, self.ny, self.nx)):
+            raise ValueError(f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.nz, self.ny, self.nx))} / "
+                             + f"{str((self.nz_halo, self.ny_halo, self.nx_halo))}")
+
+        if cpu_data.itemsize != 4:
+            raise ValueError("Wrong size of data type")
+
+        if np.isfortran(cpu_data):
+            raise TypeError("Wrong datatype (Fortran, expected C)")
--- a/GPUSimulators/common/arrays/cuda/init.py
+++ b/GPUSimulators/common/arrays/cuda/init.py
@ -0,0 +1,3 @@
+from arkawa2d import ArakawaA2D
+from array2d import CudaArray2D as Array2D
+from array3d import CudaArray3D as Array3D
--- a/GPUSimulators/common/arrays/cuda/arkawa2d.py
+++ b/GPUSimulators/common/arrays/cuda/arkawa2d.py
@ -0,0 +1,28 @@
+import numpy as np
+import pycuda.gpuarray
+
+from GPUSimulators.common.arrays.arkawa2d import BaseArakawaA2D
+
+
+class ArakawaA2D(BaseArakawaA2D):
+    """
+    A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
+    """
+
+    def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
+        """
+        Uploads initial data to the GPU device
+        """
+        super().__init__(stream, nx, ny, halo_x, halo_y, cpu_variables)
+
+    def check(self):
+        """
+        Checks that data is still sane
+        """
+        for i, gpu_variable in enumerate(self.gpu_variables):
+            var_sum = pycuda.gpuarray.sum(gpu_variable.data).get()
+            self.logger.debug(f"Data {i} with size [{gpu_variable.nx} x {gpu_variable.ny}] "
+                              + f"has average {var_sum / (gpu_variable.nx * gpu_variable.ny)}")
+
+            if np.isnan(var_sum):
+                raise ValueError("Data contains NaN values!")
--- a/GPUSimulators/common/arrays/cuda/array2d.py
+++ b/GPUSimulators/common/arrays/cuda/array2d.py
@ -1,13 +1,13 @@
-import logging
-
 import numpy as np

 import pycuda.gpuarray
 import pycuda.driver as cuda
 from pycuda.tools import PageLockedMemoryPool

+from GPUSimulators.common.arrays.array2d import BaseArray2D

-class CudaArray2D:
+
+class CudaArray2D(BaseArray2D):
    """
    Class that holds 2D CUDA data
    """
@ -17,40 +17,17 @@ class CudaArray2D:
        Uploads initial data to the CUDA device
        """

-        self.logger = logging.getLogger(__name__)
-        self.nx = nx
-        self.ny = ny
-        self.x_halo = x_halo
-        self.y_halo = y_halo
-
-        nx_halo = nx + 2 * x_halo
-        ny_halo = ny + 2 * y_halo
-
+        super().__init__(nx, ny, x_halo, y_halo, cpu_data)
        # self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
        # Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
-        self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
+        self.data = pycuda.gpuarray.zeros((self.ny_halo, self.nx_halo), dtype)

        # For returning to download
        self.memorypool = PageLockedMemoryPool()

-        # If we don't have any data, just allocate and return
-        if cpu_data is None:
-            return
-
-        # Make sure data is in proper format
-        if cpu_data.shape != (ny_halo, nx_halo) and cpu_data.shape != (self.ny, self.nx):
-            raise ValueError(
-                f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.ny, self.nx))} / {str((ny_halo, nx_halo))}")
-
-        if cpu_data.itemsize != 4:
-            raise ValueError("Wrong size of data type")
-
-        if np.isfortran(cpu_data):
-            raise TypeError("Wrong datatype (Fortran, expected C)")
-
        # Create a copy object from host to device
-        x = (nx_halo - cpu_data.shape[1]) // 2
-        y = (ny_halo - cpu_data.shape[0]) // 2
+        x = (self.nx_halo - cpu_data.shape[1]) // 2
+        y = (self.ny_halo - cpu_data.shape[0]) // 2
        self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
        # self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)

--- a/GPUSimulators/common/arrays/cuda/array3d.py
+++ b/GPUSimulators/common/arrays/cuda/array3d.py
@ -1,12 +1,12 @@
-import logging
-
 import numpy as np
 import pycuda.gpuarray
 import pycuda.driver as cuda
 from pycuda.tools import PageLockedMemoryPool

+from GPUSimulators.common.arrays.array3d import BaseArray3D

-class CudaArray3D:
+
+class CudaArray3D(BaseArray3D):
    """
    Class that holds 3D data
    """
@ -16,49 +16,24 @@ class CudaArray3D:
        Uploads initial data to the CL device
        """

-        self.logger = logging.getLogger(__name__)
-        self.nx = nx
-        self.ny = ny
-        self.nz = nz
-        self.x_halo = x_halo
-        self.y_halo = y_halo
-        self.z_halo = z_halo
-
-        nx_halo = nx + 2 * x_halo
-        ny_halo = ny + 2 * y_halo
-        nz_halo = nz + 2 * z_halo
+        super().__init__(nx, ny, nz, x_halo, y_halo, z_halo, cpu_data)

        # self.logger.debug("Allocating [%dx%dx%d] buffer", self.nx, self.ny, self.nz)
        # Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
-        self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
+        self.data = pycuda.gpuarray.zeros((self.nz_halo, self.ny_halo, self.nx_halo), dtype)

        # For returning to download
        self.memorypool = PageLockedMemoryPool()

-        # If we don't have any data, just allocate and return
-        if cpu_data is None:
-            return
-
-        # Make sure data is in proper format
-        if (cpu_data.shape != (nz_halo, ny_halo, nx_halo)
-                and cpu_data.shape != (self.nz, self.ny, self.nx)):
-            raise ValueError(f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.nz, self.ny, self.nx))} / {str((nz_halo, ny_halo, nx_halo))}")
-
-        if cpu_data.itemsize != 4:
-            raise ValueError("Wrong size of data type")
-
-        if np.isfortran(cpu_data):
-            raise TypeError("Wrong datatype (Fortran, expected C)")
-
        # Create a copy object from host to device
        copy = cuda.Memcpy3D()
        copy.set_src_host(cpu_data)
        copy.set_dst_device(self.data.gpudata)

        # Set offsets of destination
-        x_offset = (nx_halo - cpu_data.shape[2]) // 2
-        y_offset = (ny_halo - cpu_data.shape[1]) // 2
-        z_offset = (nz_halo - cpu_data.shape[0]) // 2
+        x_offset = (self.nx_halo - cpu_data.shape[2]) // 2
+        y_offset = (self.ny_halo - cpu_data.shape[1]) // 2
+        z_offset = (self.nz_halo - cpu_data.shape[0]) // 2
        copy.dst_x_in_bytes = x_offset * self.data.strides[1]
        copy.dst_y = y_offset
        copy.dst_z = z_offset
--- a/GPUSimulators/common/arrays/hip/init.py
+++ b/GPUSimulators/common/arrays/hip/init.py