mirror of
https://github.com/smyalygames/FiniteVolumeGPU_HIP.git
synced 2025-05-18 14:34:12 +02:00
update streams with hip-python
This commit is contained in:
parent
5511950c65
commit
e2b1281f5b
@ -382,8 +382,10 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
self.full_exchange()
|
self.full_exchange()
|
||||||
|
|
||||||
#nvtx.mark("sync start", color="blue")
|
#nvtx.mark("sync start", color="blue")
|
||||||
self.sim.stream.synchronize()
|
#self.sim.stream.synchronize()
|
||||||
self.sim.internal_stream.synchronize()
|
#self.sim.internal_stream.synchronize()
|
||||||
|
hip_check(hip.hipStreamSynchronize(self.sim.stream))
|
||||||
|
hip_check(hip.hipStreamSynchronize(self.sim.internal_stream))
|
||||||
#nvtx.mark("sync end", color="blue")
|
#nvtx.mark("sync end", color="blue")
|
||||||
|
|
||||||
self.profiling_data_mpi["n_time_steps"] += 1
|
self.profiling_data_mpi["n_time_steps"] += 1
|
||||||
@ -433,7 +435,8 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
if self.south is not None:
|
if self.south is not None:
|
||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s)
|
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_s[k,:,:], asynch=True, extent=self.read_s)
|
||||||
self.sim.stream.synchronize()
|
#self.sim.stream.synchronize()
|
||||||
|
hip_check(hip.hipStreamSynchronize(self.sim.stream))
|
||||||
|
|
||||||
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
|
|
||||||
@ -488,7 +491,8 @@ class MPISimulator(Simulator.BaseSimulator):
|
|||||||
if self.west is not None:
|
if self.west is not None:
|
||||||
for k in range(self.nvars):
|
for k in range(self.nvars):
|
||||||
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
self.sim.u0[k].download(self.sim.stream, cpu_data=self.out_w[k,:,:], asynch=True, extent=self.read_w)
|
||||||
self.sim.stream.synchronize()
|
#self.sim.stream.synchronize()
|
||||||
|
hip_check(hip.hipStreamSynchronize(self.sim.stream))
|
||||||
|
|
||||||
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
self.profiling_data_mpi["end"]["t_mpi_halo_exchange_download"] += time.time()
|
||||||
|
|
||||||
|
@ -24,7 +24,8 @@ import logging
|
|||||||
from GPUSimulators import Simulator, CudaContext
|
from GPUSimulators import Simulator, CudaContext
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import pycuda.driver as cuda
|
#import pycuda.driver as cuda
|
||||||
|
from hip import hip, hiprtc
|
||||||
|
|
||||||
import time
|
import time
|
||||||
|
|
||||||
@ -33,12 +34,27 @@ class SHMEMGrid(object):
|
|||||||
Class which represents an SHMEM grid of GPUs. Facilitates easy communication between
|
Class which represents an SHMEM grid of GPUs. Facilitates easy communication between
|
||||||
neighboring subdomains in the grid. Contains one CUDA context per subdomain.
|
neighboring subdomains in the grid. Contains one CUDA context per subdomain.
|
||||||
"""
|
"""
|
||||||
|
def hip_check(call_result):
|
||||||
|
err = call_result[0]
|
||||||
|
result = call_result[1:]
|
||||||
|
if len(result) == 1:
|
||||||
|
result = result[0]
|
||||||
|
if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess:
|
||||||
|
raise RuntimeError(str(err))
|
||||||
|
elif (
|
||||||
|
isinstance(err, hiprtc.hiprtcResult)
|
||||||
|
and err != hiprtc.hiprtcResult.HIPRTC_SUCCESS
|
||||||
|
):
|
||||||
|
raise RuntimeError(str(err))
|
||||||
|
return result
|
||||||
|
|
||||||
def __init__(self, ngpus=None, ndims=2):
|
def __init__(self, ngpus=None, ndims=2):
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
cuda.init(flags=0)
|
#cuda.init(flags=0)
|
||||||
self.logger.info("Initializing CUDA")
|
self.logger.info("Initializing HIP")
|
||||||
num_cuda_devices = cuda.Device.count()
|
#num_cuda_devices = cuda.Device.count()
|
||||||
|
num_cuda_devices = hip_check(hip.hipGetDeviceCount())
|
||||||
|
|
||||||
if ngpus is None:
|
if ngpus is None:
|
||||||
ngpus = num_cuda_devices
|
ngpus = num_cuda_devices
|
||||||
@ -357,7 +373,9 @@ class SHMEMSimulatorGroup(object):
|
|||||||
for k in range(self.nvars[i]):
|
for k in range(self.nvars[i]):
|
||||||
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the south)
|
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the south)
|
||||||
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.s[i][k,:,:], extent=self.read_s[i])
|
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.s[i][k,:,:], extent=self.read_s[i])
|
||||||
self.sims[i].stream.synchronize()
|
#self.sims[i].stream.synchronize()
|
||||||
|
hip_check(hip.hipStreamSynchronize(self.sims[i].stream))
|
||||||
|
|
||||||
|
|
||||||
def ns_upload(self, i):
|
def ns_upload(self, i):
|
||||||
#Upload to the GPU
|
#Upload to the GPU
|
||||||
@ -378,7 +396,8 @@ class SHMEMSimulatorGroup(object):
|
|||||||
for k in range(self.nvars[i]):
|
for k in range(self.nvars[i]):
|
||||||
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the west)
|
# XXX: Unnecessary global sync (only need to sync with neighboring subdomain to the west)
|
||||||
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.w[i][k,:,:], extent=self.read_w[i])
|
self.sims[i].u0[k].download(self.sims[i].stream, cpu_data=self.w[i][k,:,:], extent=self.read_w[i])
|
||||||
self.sims[i].stream.synchronize()
|
#self.sims[i].stream.synchronize()
|
||||||
|
hip_check(hip.hipStreamSynchronize(self.sims[i].stream))
|
||||||
|
|
||||||
def ew_upload(self, i):
|
def ew_upload(self, i):
|
||||||
#Upload to the GPU
|
#Upload to the GPU
|
||||||
|
Loading…
x
Reference in New Issue
Block a user