update streams with hip-python

This commit is contained in:
Hicham Agueny 2024-02-26 12:55:34 +01:00
parent 5511950c65
commit e2b1281f5b
2 changed files with 35 additions and 12 deletions

View File

@ -382,8 +382,10 @@ class MPISimulator(Simulator.BaseSimulator):
self.full_exchange() self.full_exchange()
#nvtx.mark("sync start", color="blue") #nvtx.mark("sync start", color="blue")
self.sim.stream.synchronize() #self.sim.stream.synchronize()
self.sim.internal_stream.synchronize() #self.sim.internal_stream.synchronize()
hip_check(hip.hipStreamSynchronize(self.sim.stream))
hip_check(hip.hipStreamSynchronize(self.sim.internal_stream))
#nvtx.mark("sync end", color="blue") #nvtx.mark("sync end", color="blue")
self.profiling_data_mpi["n_time_steps"] += 1 self.profiling_data_mpi["n_time_steps"] += 1
@ -433,7 +435,8 @@ class MPISimulator(Simulator.BaseSimulator):
if self.south is not None: if self.south is not None:
for k in range(self.nvars): for k in range(self.nvars):
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s) self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s)
self.sim.stream.synchronize() #self.sim.stream.synchronize()
hip_check(hip.hipStreamSynchronize(self.sim.stream))
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time() self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
@ -488,7 +491,8 @@ class MPISimulator(Simulator.BaseSimulator):
if self.west is not None: if self.west is not None:
for k in range(self.nvars): for k in range(self.nvars):
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w) self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
self.sim.stream.synchronize() #self.sim.stream.synchronize()
hip_check(hip.hipStreamSynchronize(self.sim.stream))
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time() self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()

View File

@ -24,7 +24,8 @@ import logging
from GPUSimulators import Simulator, CudaContext from GPUSimulators import Simulator, CudaContext
import numpy as np import numpy as np
import pycuda.driver as cuda #import pycuda.driver as cuda
from hip import hip, hiprtc
import time import time
@ -33,12 +34,27 @@ class SHMEMGrid(object):
Class which represents an SHMEM grid of GPUs. Facilitates easy communication between Class which represents an SHMEM grid of GPUs. Facilitates easy communication between
neighboring subdomains in the grid. Contains one CUDA context per subdomain. neighboring subdomains in the grid. Contains one CUDA context per subdomain.
""" """
def hip_check(call_result):
err = call_result[0]
result = call_result[1:]
if len(result) == 1:
result = result[0]
if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess:
raise RuntimeError(str(err))
elif (
isinstance(err, hiprtc.hiprtcResult)
and err != hiprtc.hiprtcResult.HIPRTC_SUCCESS
):
raise RuntimeError(str(err))
return result
def __init__(self, ngpus=None, ndims=2): def __init__(self, ngpus=None, ndims=2):
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
cuda.init(flags=0) #cuda.init(flags=0)
self.logger.info("Initializing CUDA") self.logger.info("Initializing HIP")
num_cuda_devices = cuda.Device.count() #num_cuda_devices = cuda.Device.count()
num_cuda_devices = hip_check(hip.hipGetDeviceCount())
if ngpus is None: if ngpus is None:
ngpus = num_cuda_devices ngpus = num_cuda_devices
@ -357,7 +373,9 @@ class SHMEMSimulatorGroup(object):
for k in range(self.nvars[i]): for k in range(self.nvars[i]):
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the south) # XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the south)
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.s[i][k,:,:], extent=self.read_s[i]) self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.s[i][k,:,:], extent=self.read_s[i])
self.sims[i].stream.synchronize() #self.sims[i].stream.synchronize()
hip_check(hip.hipStreamSynchronize(self.sims[i].stream))
def ns_upload(self, i): def ns_upload(self, i):
#Upload to the GPU #Upload to the GPU
@ -378,7 +396,8 @@ class SHMEMSimulatorGroup(object):
for k in range(self.nvars[i]): for k in range(self.nvars[i]):
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the west) # XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the west)
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.w[i][k,:,:], extent=self.read_w[i]) self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.w[i][k,:,:], extent=self.read_w[i])
self.sims[i].stream.synchronize() #self.sims[i].stream.synchronize()
hip_check(hip.hipStreamSynchronize(self.sims[i].stream))
def ew_upload(self, i): def ew_upload(self, i):
#Upload to the GPU #Upload to the GPU