Bugfix common download / upload

2025-05-18 06:24:13 +02:00 · 2018-11-21 12:03:52 +01:00 · 2018-11-21 12:03:52 +01:00 · 6b7de7b755
commit 6b7de7b755
parent a3f063267a
1 changed files with 18 additions and 34 deletions
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -308,31 +308,9 @@ class CudaArray2D:
        assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
        #Create copy object from host to device
-        copy = cuda.Memcpy2D()
+        x = (nx_halo - cpu_data.shape[1]) // 2
-        copy.set_src_host(cpu_data)
+        y = (ny_halo - cpu_data.shape[0]) // 2
-        copy.set_dst_device(self.data.gpudata)
+        self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
        #Set offsets of upload in destination
        # This handles the cases where cpu_data contains ghost cell values
        # and also when it does not
        x_offset = (nx_halo - cpu_data.shape[1]) // 2
        y_offset = (ny_halo - cpu_data.shape[0]) // 2
        copy.dst_x_in_bytes = x_offset*self.data.strides[1]
        copy.dst_y = y_offset
        #Set destination pitch
        copy.dst_pitch = self.data.strides[0]
        #Set width in bytes to copy for each row and
        #number of rows to copy
        width = max(self.nx, cpu_data.shape[1])
        height = max(self.ny, cpu_data.shape[0])
        copy.width_in_bytes = width*cpu_data.itemsize
        copy.height = height
        #Perform the copy
        copy(stream)
        #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
@ -344,19 +322,25 @@ class CudaArray2D:
    """
    Enables downloading data from GPU to Python
    """
-    def download(self, stream, async=False, extent=None):
+    def download(self, stream, cpu_data=None, async=False, extent=None):
-        if (extent == None):
+        if (extent is None):
            x = self.x_halo
            y = self.y_halo
            nx = self.nx
            ny = self.ny
        else:
            x, y, nx, ny = extent
-    
+            
-        #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
+        if (cpu_data is None):
-        #Allocate host memory
+            #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
-        #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
+            #Allocate host memory
-        cpu_data = np.empty((ny, nx), dtype=np.float32)
+            #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
            cpu_data = np.empty((ny, nx), dtype=np.float32)
        assert nx == cpu_data.shape[1]
        assert ny == cpu_data.shape[0]
        assert x+nx <= self.nx + 2*self.x_halo
        assert y+ny <= self.ny + 2*self.y_halo
        #Create copy object from device to host
        copy = cuda.Memcpy2D()
@ -380,8 +364,8 @@ class CudaArray2D:
        return cpu_data
-    def upload(self, cpu_data, stream, extent=None):
+    def upload(self, stream, cpu_data, extent=None):
-        if (extent == None):
+        if (extent is None):
            x = self.x_halo
            y = self.y_halo
            nx = self.nx