feat(mpi): add hip version of MPISimulator

This commit is contained in:
Anthony Berg 2025-07-03 12:21:44 +02:00
parent ebaad2eaa6
commit a28c47d15f
5 changed files with 81 additions and 24 deletions

View File

@ -1,2 +1,10 @@
from os import environ
__env_name = 'GPU_LANG'
if __env_name in environ and environ.get(__env_name).lower() == "cuda":
from .cuda_simulator import CudaMPISimulator as MPISimulator
else:
from .hip_simulator import HIPMPISimulator as MPISimulator
from .grid import MPIGrid
from .simulator import MPISimulator

View File

@ -0,0 +1,29 @@
import numpy as np
import pycuda.driver as cuda
from .simulator import BaseMPISimulator
class CudaMPISimulator(BaseMPISimulator):
def __init__(self, sim, grid):
super().__init__(sim, grid)
def __create_pagelocked_memory(self):
self.in_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_e[3], self.read_e[2]), dtype=np.float32)
self.in_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_w[3], self.read_w[2]), dtype=np.float32)
self.in_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_n[3], self.read_n[2]), dtype=np.float32)
self.in_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_s[3], self.read_s[2]), dtype=np.float32)
# Allocate data for sending
self.out_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])),
dtype=np.float32) # np.empty_like(self.in_e)
self.out_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])),
dtype=np.float32) # np.empty_like(self.in_w)
self.out_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])),
dtype=np.float32) # np.empty_like(self.in_n)
self.out_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])),
dtype=np.float32) # np.empty_like(self.in_s)

View File

@ -1,3 +1,6 @@
import logging
import numpy as np
from mpi4py import MPI

View File

@ -0,0 +1,28 @@
import numpy as np
from .simulator import BaseMPISimulator
class HIPMPISimulator(BaseMPISimulator):
def __init__(self, sim, grid):
super().__init__(sim, grid)
def __create_pagelocked_memory(self):
self.in_e = np.empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_e[3], self.read_e[2]), dtype=np.float32)
self.in_w = np.empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_w[3], self.read_w[2]), dtype=np.float32)
self.in_n = np.empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_n[3], self.read_n[2]), dtype=np.float32)
self.in_s = np.empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_s[3], self.read_s[2]), dtype=np.float32)
# Allocate data for sending
self.out_e = np.empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])),
dtype=np.float32) # np.empty_like(self.in_e)
self.out_w = np.empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])),
dtype=np.float32) # np.empty_like(self.in_w)
self.out_n = np.empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])),
dtype=np.float32) # np.empty_like(self.in_n)
self.out_s = np.empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])),
dtype=np.float32) # np.empty_like(self.in_s)

View File

@ -24,12 +24,10 @@ import numpy as np
from mpi4py import MPI
import time
import pycuda.driver as cuda
from GPUSimulators.simulator import BaseSimulator, BoundaryCondition
class MPISimulator(BaseSimulator):
class BaseMPISimulator(BaseSimulator):
"""
Class which handles communication between simulators on different MPI nodes
"""
@ -116,6 +114,16 @@ class MPISimulator(BaseSimulator):
self.write_n = self.read_n + np.array([0, gc_y, 0, 0])
self.write_s = self.read_s - np.array([0, gc_y, 0, 0])
self.in_e = None
self.in_w = None
self.in_n = None
self.in_s = None
# Allocate data for sending
self.out_e = None
self.out_w = None
self.out_n = None
self.out_s = None
self.__create_pagelocked_memory()
self.logger.debug(f"Simulator rank {self.grid.comm.rank} initialized on {MPI.Get_processor_name()}")
@ -301,23 +309,4 @@ class MPISimulator(BaseSimulator):
whilst north/south only transfer internal cells
Reuses the width/height defined in the read-extets above
"""
self.in_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_e[3], self.read_e[2]), dtype=np.float32)
self.in_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_w[3], self.read_w[2]), dtype=np.float32)
self.in_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_n[3], self.read_n[2]), dtype=np.float32)
self.in_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])),
dtype=np.float32) # np.empty((self.nvars, self.read_s[3], self.read_s[2]), dtype=np.float32)
# Allocate data for sending
self.out_e = cuda.pagelocked_empty((int(self.nvars), int(self.read_e[3]), int(self.read_e[2])),
dtype=np.float32) # np.empty_like(self.in_e)
self.out_w = cuda.pagelocked_empty((int(self.nvars), int(self.read_w[3]), int(self.read_w[2])),
dtype=np.float32) # np.empty_like(self.in_w)
self.out_n = cuda.pagelocked_empty((int(self.nvars), int(self.read_n[3]), int(self.read_n[2])),
dtype=np.float32) # np.empty_like(self.in_n)
self.out_s = cuda.pagelocked_empty((int(self.nvars), int(self.read_s[3]), int(self.read_s[2])),
dtype=np.float32) # np.empty_like(self.in_s)
raise NotImplementedError("This function needs to be implemented in a subclass.")