EulerTesting.ipynb now runs on Linux. Simulation results needs further validation.

2025-11-29 17:28:03 +01:00 · 2019-06-05 15:30:35 +00:00
parent 4fa14abdff
commit 4ca9581b37
4 changed files with 546093 additions and 497928 deletions
--- a/EulerTesting.ipynb
+++ b/EulerTesting.ipynb
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@@ -510,7 +510,7 @@ class CudaArray2D:
    """
    Enables downloading data from GPU to Python
    """
-    def download(self, stream, cpu_data=None, async=False, extent=None):
+    def download(self, stream, cpu_data=None, asynch=False, extent=None):
        if (extent is None):
            x = self.x_halo
            y = self.y_halo
@@ -548,7 +548,7 @@ class CudaArray2D:
        copy.height = int(ny)
        copy(stream)
-        if async==False:
+        if asynch==False:
            stream.synchronize()
        return cpu_data
@@ -667,7 +667,7 @@ class CudaArray3D:
    """
    Enables downloading data from GPU to Python
    """
-    def download(self, stream, async=False):
+    def download(self, stream, asynch=False):
        #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
        #Allocate host memory
        #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
@@ -692,7 +692,7 @@ class CudaArray3D:
        copy.depth = self.nz
        copy(stream)
-        if async==False:
+        if asynch==False:
            stream.synchronize()
        return cpu_data
@@ -734,7 +734,7 @@ class ArakawaA2D:
        cpu_variables = []
        for i in variables:
            assert i < len(self.gpu_variables), "Variable {:d} is out of range".format(i)
-            cpu_variables += [self.gpu_variables[i].download(stream, async=True)]
+            cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)]
        stream.synchronize()
        return cpu_variables
--- a/GPUSimulators/EE2D_KP07_dimsplit.py
+++ b/GPUSimulators/EE2D_KP07_dimsplit.py
@@ -105,8 +105,8 @@ class EE2D_KP07_dimsplit (BaseSimulator):
        self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
        dt_x = np.min(self.dx / (np.abs(rho_u/rho) + np.sqrt(gamma*rho)))
        dt_y = np.min(self.dy / (np.abs(rho_v/rho) + np.sqrt(gamma*rho)))
-        dt = min(dt_x, dt_y)
+        self.dt = min(dt_x, dt_y)
-        self.cfl_data.fill(dt, stream=self.stream)
+        self.cfl_data.fill(self.dt, stream=self.stream)
    def substep(self, dt, step_number):
--- a/GPUSimulators/cuda/common.h
+++ b/GPUSimulators/cuda/common.h
@@ -130,6 +130,113 @@ inline __device__ BoundaryCondition getBCWest(int bc_) {
 }
 // West boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcWestReflective(float Q[h+2*gc_y][w+2*gc_x], 
                                const int nx_, const int ny_) {
    for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
        const int i = threadIdx.x + gc_x;
        const int ti = blockDim.x*blockIdx.x + i;
        if (gc_x >= 1 && ti == gc_x) {
            Q[j][i-1] = sign*Q[j][i];
        }
        if (gc_x >= 2 && ti == gc_x + 1) {
            Q[j][i-3] = sign*Q[j][i];
        }
        if (gc_x >= 3 && ti == gc_x + 2) {
            Q[j][i-5] = sign*Q[j][i];
        }
        if (gc_x >= 4 && ti == gc_x + 3) {
            Q[j][i-7] = sign*Q[j][i];
        }
        if (gc_x >= 5 && ti == gc_x + 4) {
            Q[j][i-9] = sign*Q[j][i];
        }
    }
 }
 // East boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcEastReflective(float Q[h+2*gc_y][w+2*gc_x], 
                                const int nx_, const int ny_) {
    for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
        const int i = threadIdx.x + gc_x;
        const int ti = blockDim.x*blockIdx.x + i;
        if (gc_x >= 1 && ti == nx_ + gc_x - 1) {
            Q[j][i+1] = sign*Q[j][i];
        }
        if (gc_x >= 2 && ti == nx_ + gc_x - 2) {
            Q[j][i+3] = sign*Q[j][i];
        }
        if (gc_x >= 3 && ti == nx_ + gc_x - 3) {
            Q[j][i+5] = sign*Q[j][i];
        }
        if (gc_x >= 4 && ti == nx_ + gc_x - 4) {
            Q[j][i+7] = sign*Q[j][i];
        }
        if (gc_x >= 5 && ti == nx_ + gc_x - 5) {
            Q[j][i+9] = sign*Q[j][i];
        }
    }
 }
 // South boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcSouthReflective(float Q[h+2*gc_y][w+2*gc_x], 
                                const int nx_, const int ny_) {
    for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
        const int j = threadIdx.y + gc_y;
        const int tj = blockDim.y*blockIdx.y + j;
        if (gc_y >= 1 && tj == gc_y) {
            Q[j-1][i] = sign*Q[j][i];
        }
        if (gc_y >= 2 && tj == gc_y + 1) {
            Q[j-3][i] = sign*Q[j][i];
        }
        if (gc_y >= 3 && tj == gc_y + 2) {
            Q[j-5][i] = sign*Q[j][i];
        }
        if (gc_y >= 4 && tj == gc_y + 3) {
            Q[j-7][i] = sign*Q[j][i];
        }
        if (gc_y >= 5 && tj == gc_y + 4) {
            Q[j-9][i] = sign*Q[j][i];
        }
    }
 }
 // North boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcNorthReflective(float Q[h+2*gc_y][w+2*gc_x], const int nx_, const int ny_) {
    for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
        const int j = threadIdx.y + gc_y;
        const int tj = blockDim.y*blockIdx.y + j;
        if (gc_y >= 1 && tj == ny_ + gc_y - 1) {
            Q[j+1][i] = sign*Q[j][i];
        }
        if (gc_y >= 2 && tj == ny_ + gc_y - 2) {
            Q[j+3][i] = sign*Q[j][i];
        }
        if (gc_y >= 3 && tj == ny_ + gc_y - 3) {
            Q[j+5][i] = sign*Q[j][i];
        }
        if (gc_y >= 4 && tj == ny_ + gc_y - 4) {
            Q[j+7][i] = sign*Q[j][i];
        }
        if (gc_y >= 5 && tj == ny_ + gc_y - 5) {
            Q[j+9][i] = sign*Q[j][i];
        }
    }
 }
@@ -320,138 +427,6 @@ inline __device__ void writeBlock(float* ptr_, int pitch_,
 // West boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcWestReflective(float Q[h+2*gc_y][w+2*gc_x], 
                                const int nx_, const int ny_) {
    for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
        const int i = threadIdx.x + gc_x;
        const int ti = blockDim.x*blockIdx.x + i;
        if (gc_x >= 1 && ti == gc_x) {
            Q[j][i-1] = sign*Q[j][i];
        }
        if (gc_x >= 2 && ti == gc_x + 1) {
            Q[j][i-3] = sign*Q[j][i];
        }
        if (gc_x >= 3 && ti == gc_x + 2) {
            Q[j][i-5] = sign*Q[j][i];
        }
        if (gc_x >= 4 && ti == gc_x + 3) {
            Q[j][i-7] = sign*Q[j][i];
        }
        if (gc_x >= 5 && ti == gc_x + 4) {
            Q[j][i-9] = sign*Q[j][i];
        }
    }
 }
 // East boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcEastReflective(float Q[h+2*gc_y][w+2*gc_x], 
                                const int nx_, const int ny_) {
    for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
        const int i = threadIdx.x + gc_x;
        const int ti = blockDim.x*blockIdx.x + i;
        if (gc_x >= 1 && ti == nx_ + gc_x - 1) {
            Q[j][i+1] = sign*Q[j][i];
        }
        if (gc_x >= 2 && ti == nx_ + gc_x - 2) {
            Q[j][i+3] = sign*Q[j][i];
        }
        if (gc_x >= 3 && ti == nx_ + gc_x - 3) {
            Q[j][i+5] = sign*Q[j][i];
        }
        if (gc_x >= 4 && ti == nx_ + gc_x - 4) {
            Q[j][i+7] = sign*Q[j][i];
        }
        if (gc_x >= 5 && ti == nx_ + gc_x - 5) {
            Q[j][i+9] = sign*Q[j][i];
        }
    }
 }
 // South boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcSouthReflective(float Q[h+2*gc_y][w+2*gc_x], 
                                const int nx_, const int ny_) {
    for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
        const int j = threadIdx.y + gc_y;
        const int tj = blockDim.y*blockIdx.y + j;
        if (gc_y >= 1 && tj == gc_y) {
            Q[j-1][i] = sign*Q[j][i];
        }
        if (gc_y >= 2 && tj == gc_y + 1) {
            Q[j-3][i] = sign*Q[j][i];
        }
        if (gc_y >= 3 && tj == gc_y + 2) {
            Q[j-5][i] = sign*Q[j][i];
        }
        if (gc_y >= 4 && tj == gc_y + 3) {
            Q[j-7][i] = sign*Q[j][i];
        }
        if (gc_y >= 5 && tj == gc_y + 4) {
            Q[j-9][i] = sign*Q[j][i];
        }
    }
 }
 // North boundary
 template<int w, int h, int gc_x, int gc_y, int sign>
 __device__ void bcNorthReflective(float Q[h+2*gc_y][w+2*gc_x], const int nx_, const int ny_) {
    for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
        const int j = threadIdx.y + gc_y;
        const int tj = blockDim.y*blockIdx.y + j;
        if (gc_y >= 1 && tj == ny_ + gc_y - 1) {
            Q[j+1][i] = sign*Q[j][i];
        }
        if (gc_y >= 2 && tj == ny_ + gc_y - 2) {
            Q[j+3][i] = sign*Q[j][i];
        }
        if (gc_y >= 3 && tj == ny_ + gc_y - 3) {
            Q[j+5][i] = sign*Q[j][i];
        }
        if (gc_y >= 4 && tj == ny_ + gc_y - 4) {
            Q[j+7][i] = sign*Q[j][i];
        }
        if (gc_y >= 5 && tj == ny_ + gc_y - 5) {
            Q[j+9][i] = sign*Q[j][i];
        }
    }
 }
 template<int w, int h, int gc_x, int gc_y, int vars>