mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:27:40 +01:00 
			
		
		
		
	fix(common): handling copying array on HIP
This commit is contained in:
		
							parent
							
								
									30aaccba91
								
							
						
					
					
						commit
						5c60978614
					
				| @ -3,6 +3,9 @@ from os import environ | ||||
| __env_name = 'GPU_LANG' | ||||
| 
 | ||||
| if __env_name in environ and environ.get(__env_name).lower() == "cuda": | ||||
|     from .cuda import * | ||||
|     from .cuda.arkawa2d import CudaArakawaA2D as ArakawaA2D | ||||
|     from .cuda.array2d import CudaArray2D as Array2D | ||||
|     from .cuda.array3d import CudaArray3D as Array3D | ||||
| else: | ||||
|     from .hip import * | ||||
|     from .hip.arkawa2d import HIPArakawaA2D as ArakawaA2D | ||||
|     from .hip.array2d import HIPArray2D as Array2D | ||||
|  | ||||
| @ -37,5 +37,4 @@ class BaseArakawaA2D(object): | ||||
|                 raise IndexError(f"Variable {i} is out of range") | ||||
|             cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)] | ||||
| 
 | ||||
|         # stream.synchronize() | ||||
|         return cpu_variables | ||||
|  | ||||
| @ -40,6 +40,12 @@ class BaseArray2D(object): | ||||
|         if np.isfortran(cpu_data): | ||||
|             raise TypeError("Wrong datatype (Fortran, expected C)") | ||||
| 
 | ||||
|     def get_strides(self) -> tuple[int, ...]: | ||||
|         """ | ||||
|         Gets the number of bytes it takes to move to the next element. | ||||
|         """ | ||||
|         raise NotImplementedError("This function needs to be implemented in a subclass.") | ||||
| 
 | ||||
|     def check(self, x, y, nx, ny, cpu_data): | ||||
|         if nx != cpu_data.shape[1]: | ||||
|             raise ValueError | ||||
|  | ||||
| @ -1,3 +0,0 @@ | ||||
| from .arkawa2d import CudaArakawaA2D as ArakawaA2D | ||||
| from .array2d import CudaArray2D as Array2D | ||||
| from .array3d import CudaArray3D as Array3D | ||||
| @ -108,3 +108,6 @@ class CudaArray2D(BaseArray2D): | ||||
|         copy.height = int(ny) | ||||
| 
 | ||||
|         copy(stream) | ||||
| 
 | ||||
|     def get_strides(self) -> tuple[int, ...]: | ||||
|         return self.data.strides[0] | ||||
|  | ||||
| @ -1,3 +0,0 @@ | ||||
| from .arkawa2d import HIPArakawaA2D as ArakawaA2D | ||||
| from .array2d import HIPArray2D as Array2D | ||||
| # from .array3d import HIPArray3D as Array3D | ||||
| @ -1,7 +1,7 @@ | ||||
| import numpy as np | ||||
| from hip import hip, hipblas | ||||
| 
 | ||||
| from ....common import hip_check | ||||
| from ...hip_check import hip_check | ||||
| from ..arkawa2d import BaseArakawaA2D | ||||
| from .array2d import HIPArray2D | ||||
| 
 | ||||
| @ -13,10 +13,10 @@ def _sum_array(array: HIPArray2D): | ||||
|         array: A HIPArray2D to compute the sum of. | ||||
|     """ | ||||
|     data_h = array.data_h | ||||
|     num_bytes = array.dtype.itemsize | ||||
|     num_bytes = array.num_bytes | ||||
| 
 | ||||
|     result_d = hip_check(hip.hipMalloc(num_bytes)) | ||||
|     result_h = array.dtype.type(0) | ||||
|     result_h = np.zeros(1, dtype=array.dtype) | ||||
| 
 | ||||
|     # Sum the ``data_h`` array using hipblas | ||||
|     handle = hip_check(hipblas.hipblasCreate()) | ||||
|  | ||||
| @ -55,16 +55,15 @@ class HIPArray2D(BaseArray2D): | ||||
|         if cpu_data is None: | ||||
|             # self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) | ||||
|             # Allocate host memory | ||||
|             cpu_data = np.empty((ny, nx), dtype=self.dtype) | ||||
|             cpu_data = np.zeros((ny, nx), dtype=self.dtype) | ||||
| 
 | ||||
|         self.check(x, y, nx, ny, cpu_data) | ||||
|         copy_args = hip.hip_Memcpy2D(**self.__get_copy_info(x, y, nx, ny, cpu_data, True)) | ||||
| 
 | ||||
|         hip_check(hip.hipMemcpyParam2DAsync(copy_args, stream)) | ||||
| 
 | ||||
|         if not asynch: | ||||
|             hip_check(hip.hipStreamSynchronize(stream)) | ||||
| 
 | ||||
|         hip_check( | ||||
|             hip.hipMemcpyAsync(self.data, cpu_data, self.num_bytes, hip.hipMemcpyKind.hipMemcpyDeviceToHost, stream)) | ||||
| 
 | ||||
|         return cpu_data | ||||
| 
 | ||||
|     def upload(self, stream, cpu_data, extent=None): | ||||
| @ -76,9 +75,58 @@ class HIPArray2D(BaseArray2D): | ||||
|         else: | ||||
|             x, y, nx, ny = extent | ||||
| 
 | ||||
|         self.check(x, y, nx, ny, cpu_data) | ||||
|         copy_param = hip.hip_Memcpy2D(**self.__get_copy_info(x, y, nx, ny, cpu_data)) | ||||
| 
 | ||||
|         # TODO implement non-async to test if it actually works - avoid errors | ||||
|         # Create a copy object from device to host | ||||
|         hip_check(hip.hipMemcpyAsync(self.data, self.data_h, self.num_bytes, hip.hipMemcpyKind.hipMemcpyHostToDevice, | ||||
|                                      stream)) | ||||
|         hip_check(hip.hipMemcpyParam2DAsync(copy_param, stream)) | ||||
| 
 | ||||
|     def get_strides(self) -> tuple[int, ...]: | ||||
|         strides = [] | ||||
|         for i in range(len(self.data_h.shape)): | ||||
|             strides.append(self.data_h.shape[i] * np.float32().itemsize) | ||||
| 
 | ||||
|         return tuple(strides) | ||||
| 
 | ||||
|     def __get_copy_info(self, x, y, nx, ny, host, to_host=False): | ||||
|         self.check(x, y, nx, ny, host) | ||||
| 
 | ||||
|         # Arguments for the host data | ||||
|         src_args = [ | ||||
|             'Host', | ||||
|             0, | ||||
|             0, | ||||
|             hip.hipMemoryType.hipMemoryTypeHost, | ||||
|             host, | ||||
|             host.strides[0] | ||||
| 
 | ||||
|         ] | ||||
|         # Arguments for the device | ||||
|         dst_args = [ | ||||
|             'Device', | ||||
|             int(x) * np.float32().itemsize, | ||||
|             int(y), | ||||
|             hip.hipMemoryType.hipMemoryTypeDevice, | ||||
|             self.data, | ||||
|             self.get_strides()[0], | ||||
|         ] | ||||
| 
 | ||||
|         if to_host: | ||||
|             src_args, dst_args = dst_args, src_args | ||||
| 
 | ||||
|         args = { | ||||
|             'srcXInBytes': src_args[1], | ||||
|             'srcY': src_args[2], | ||||
|             'srcMemoryType': src_args[3], | ||||
|             f'src{src_args[0]}': src_args[4], | ||||
|             'srcPitch': src_args[5], | ||||
| 
 | ||||
|             'dstXInBytes': dst_args[1], | ||||
|             'dstY': dst_args[2], | ||||
|             'dstMemoryType': dst_args[3], | ||||
|             f'dst{dst_args[0]}': dst_args[4], | ||||
|             'dstPitch': dst_args[5], | ||||
| 
 | ||||
|             'WidthInBytes': int(nx) * np.float32().itemsize, | ||||
|             'Height': int(ny) | ||||
|         } | ||||
| 
 | ||||
|         return args | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg