mirror of
https://github.com/smyalygames/FiniteVolumeGPU_HIP.git
synced 2025-05-18 06:24:11 +02:00
update streams with hip-python
This commit is contained in:
parent
5511950c65
commit
e2b1281f5b
@ -382,8 +382,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
self.full_exchange()
|
||||
|
||||
#nvtx.mark("sync start", color="blue")
|
||||
self.sim.stream.synchronize()
|
||||
self.sim.internal_stream.synchronize()
|
||||
#self.sim.stream.synchronize()
|
||||
#self.sim.internal_stream.synchronize()
|
||||
hip_check(hip.hipStreamSynchronize(self.sim.stream))
|
||||
hip_check(hip.hipStreamSynchronize(self.sim.internal_stream))
|
||||
#nvtx.mark("sync end", color="blue")
|
||||
|
||||
self.profiling_data_mpi["n_time_steps"] += 1
|
||||
@ -433,7 +435,8 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
if self.south is not None:
|
||||
for k in range(self.nvars):
|
||||
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s)
|
||||
self.sim.stream.synchronize()
|
||||
#self.sim.stream.synchronize()
|
||||
hip_check(hip.hipStreamSynchronize(self.sim.stream))
|
||||
|
||||
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||
|
||||
@ -488,7 +491,8 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
if self.west is not None:
|
||||
for k in range(self.nvars):
|
||||
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
||||
self.sim.stream.synchronize()
|
||||
#self.sim.stream.synchronize()
|
||||
hip_check(hip.hipStreamSynchronize(self.sim.stream))
|
||||
|
||||
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||
|
||||
|
@ -24,7 +24,8 @@ import logging
|
||||
from GPUSimulators import Simulator, CudaContext
|
||||
import numpy as np
|
||||
|
||||
import pycuda.driver as cuda
|
||||
#import pycuda.driver as cuda
|
||||
from hip import hip, hiprtc
|
||||
|
||||
import time
|
||||
|
||||
@ -33,12 +34,27 @@ class SHMEMGrid(object):
|
||||
Class which represents an SHMEM grid of GPUs. Facilitates easy communication between
|
||||
neighboring subdomains in the grid. Contains one CUDA context per subdomain.
|
||||
"""
|
||||
def hip_check(call_result):
|
||||
err = call_result[0]
|
||||
result = call_result[1:]
|
||||
if len(result) == 1:
|
||||
result = result[0]
|
||||
if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess:
|
||||
raise RuntimeError(str(err))
|
||||
elif (
|
||||
isinstance(err, hiprtc.hiprtcResult)
|
||||
and err != hiprtc.hiprtcResult.HIPRTC_SUCCESS
|
||||
):
|
||||
raise RuntimeError(str(err))
|
||||
return result
|
||||
|
||||
def __init__(self, ngpus=None, ndims=2):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
cuda.init(flags=0)
|
||||
self.logger.info("Initializing CUDA")
|
||||
num_cuda_devices = cuda.Device.count()
|
||||
#cuda.init(flags=0)
|
||||
self.logger.info("Initializing HIP")
|
||||
#num_cuda_devices = cuda.Device.count()
|
||||
num_cuda_devices = hip_check(hip.hipGetDeviceCount())
|
||||
|
||||
if ngpus is None:
|
||||
ngpus = num_cuda_devices
|
||||
@ -357,7 +373,9 @@ class SHMEMSimulatorGroup(object):
|
||||
for k in range(self.nvars[i]):
|
||||
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the south)
|
||||
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.s[i][k,:,:], extent=self.read_s[i])
|
||||
self.sims[i].stream.synchronize()
|
||||
#self.sims[i].stream.synchronize()
|
||||
hip_check(hip.hipStreamSynchronize(self.sims[i].stream))
|
||||
|
||||
|
||||
def ns_upload(self, i):
|
||||
#Upload to the GPU
|
||||
@ -378,7 +396,8 @@ class SHMEMSimulatorGroup(object):
|
||||
for k in range(self.nvars[i]):
|
||||
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the west)
|
||||
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.w[i][k,:,:], extent=self.read_w[i])
|
||||
self.sims[i].stream.synchronize()
|
||||
#self.sims[i].stream.synchronize()
|
||||
hip_check(hip.hipStreamSynchronize(self.sims[i].stream))
|
||||
|
||||
def ew_upload(self, i):
|
||||
#Upload to the GPU
|
||||
|
Loading…
x
Reference in New Issue
Block a user