EulerTesting.ipynb now runs on Linux. Simulation results needs further validation.

This commit is contained in:
Martin Lilleeng Sætra 2019-06-05 15:30:35 +00:00
parent 4fa14abdff
commit 4ca9581b37
No known key found for this signature in database
GPG Key ID: 5282F74D778CB274
4 changed files with 546093 additions and 497928 deletions

File diff suppressed because one or more lines are too long

View File

@ -510,7 +510,7 @@ class CudaArray2D:
"""
Enables downloading data from GPU to Python
"""
def download(self, stream, cpu_data=None, async=False, extent=None):
def download(self, stream, cpu_data=None, asynch=False, extent=None):
if (extent is None):
x = self.x_halo
y = self.y_halo
@ -548,7 +548,7 @@ class CudaArray2D:
copy.height = int(ny)
copy(stream)
if async==False:
if asynch==False:
stream.synchronize()
return cpu_data
@ -667,7 +667,7 @@ class CudaArray3D:
"""
Enables downloading data from GPU to Python
"""
def download(self, stream, async=False):
def download(self, stream, asynch=False):
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
#Allocate host memory
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
@ -692,7 +692,7 @@ class CudaArray3D:
copy.depth = self.nz
copy(stream)
if async==False:
if asynch==False:
stream.synchronize()
return cpu_data
@ -734,7 +734,7 @@ class ArakawaA2D:
cpu_variables = []
for i in variables:
assert i < len(self.gpu_variables), "Variable {:d} is out of range".format(i)
cpu_variables += [self.gpu_variables[i].download(stream, async=True)]
cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)]
stream.synchronize()
return cpu_variables

View File

@ -105,8 +105,8 @@ class EE2D_KP07_dimsplit (BaseSimulator):
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
dt_x = np.min(self.dx / (np.abs(rho_u/rho) + np.sqrt(gamma*rho)))
dt_y = np.min(self.dy / (np.abs(rho_v/rho) + np.sqrt(gamma*rho)))
dt = min(dt_x, dt_y)
self.cfl_data.fill(dt, stream=self.stream)
self.dt = min(dt_x, dt_y)
self.cfl_data.fill(self.dt, stream=self.stream)
def substep(self, dt, step_number):

View File

@ -130,6 +130,113 @@ inline __device__ BoundaryCondition getBCWest(int bc_) {
}
// West boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcWestReflective(float Q[h+2*gc_y][w+2*gc_x],
const int nx_, const int ny_) {
for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
const int i = threadIdx.x + gc_x;
const int ti = blockDim.x*blockIdx.x + i;
if (gc_x >= 1 && ti == gc_x) {
Q[j][i-1] = sign*Q[j][i];
}
if (gc_x >= 2 && ti == gc_x + 1) {
Q[j][i-3] = sign*Q[j][i];
}
if (gc_x >= 3 && ti == gc_x + 2) {
Q[j][i-5] = sign*Q[j][i];
}
if (gc_x >= 4 && ti == gc_x + 3) {
Q[j][i-7] = sign*Q[j][i];
}
if (gc_x >= 5 && ti == gc_x + 4) {
Q[j][i-9] = sign*Q[j][i];
}
}
}
// East boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcEastReflective(float Q[h+2*gc_y][w+2*gc_x],
const int nx_, const int ny_) {
for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
const int i = threadIdx.x + gc_x;
const int ti = blockDim.x*blockIdx.x + i;
if (gc_x >= 1 && ti == nx_ + gc_x - 1) {
Q[j][i+1] = sign*Q[j][i];
}
if (gc_x >= 2 && ti == nx_ + gc_x - 2) {
Q[j][i+3] = sign*Q[j][i];
}
if (gc_x >= 3 && ti == nx_ + gc_x - 3) {
Q[j][i+5] = sign*Q[j][i];
}
if (gc_x >= 4 && ti == nx_ + gc_x - 4) {
Q[j][i+7] = sign*Q[j][i];
}
if (gc_x >= 5 && ti == nx_ + gc_x - 5) {
Q[j][i+9] = sign*Q[j][i];
}
}
}
// South boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcSouthReflective(float Q[h+2*gc_y][w+2*gc_x],
const int nx_, const int ny_) {
for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
const int j = threadIdx.y + gc_y;
const int tj = blockDim.y*blockIdx.y + j;
if (gc_y >= 1 && tj == gc_y) {
Q[j-1][i] = sign*Q[j][i];
}
if (gc_y >= 2 && tj == gc_y + 1) {
Q[j-3][i] = sign*Q[j][i];
}
if (gc_y >= 3 && tj == gc_y + 2) {
Q[j-5][i] = sign*Q[j][i];
}
if (gc_y >= 4 && tj == gc_y + 3) {
Q[j-7][i] = sign*Q[j][i];
}
if (gc_y >= 5 && tj == gc_y + 4) {
Q[j-9][i] = sign*Q[j][i];
}
}
}
// North boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcNorthReflective(float Q[h+2*gc_y][w+2*gc_x], const int nx_, const int ny_) {
for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
const int j = threadIdx.y + gc_y;
const int tj = blockDim.y*blockIdx.y + j;
if (gc_y >= 1 && tj == ny_ + gc_y - 1) {
Q[j+1][i] = sign*Q[j][i];
}
if (gc_y >= 2 && tj == ny_ + gc_y - 2) {
Q[j+3][i] = sign*Q[j][i];
}
if (gc_y >= 3 && tj == ny_ + gc_y - 3) {
Q[j+5][i] = sign*Q[j][i];
}
if (gc_y >= 4 && tj == ny_ + gc_y - 4) {
Q[j+7][i] = sign*Q[j][i];
}
if (gc_y >= 5 && tj == ny_ + gc_y - 5) {
Q[j+9][i] = sign*Q[j][i];
}
}
}
@ -320,138 +427,6 @@ inline __device__ void writeBlock(float* ptr_, int pitch_,
// West boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcWestReflective(float Q[h+2*gc_y][w+2*gc_x],
const int nx_, const int ny_) {
for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
const int i = threadIdx.x + gc_x;
const int ti = blockDim.x*blockIdx.x + i;
if (gc_x >= 1 && ti == gc_x) {
Q[j][i-1] = sign*Q[j][i];
}
if (gc_x >= 2 && ti == gc_x + 1) {
Q[j][i-3] = sign*Q[j][i];
}
if (gc_x >= 3 && ti == gc_x + 2) {
Q[j][i-5] = sign*Q[j][i];
}
if (gc_x >= 4 && ti == gc_x + 3) {
Q[j][i-7] = sign*Q[j][i];
}
if (gc_x >= 5 && ti == gc_x + 4) {
Q[j][i-9] = sign*Q[j][i];
}
}
}
// East boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcEastReflective(float Q[h+2*gc_y][w+2*gc_x],
const int nx_, const int ny_) {
for (int j=threadIdx.y; j<h+2*gc_y; j+=h) {
const int i = threadIdx.x + gc_x;
const int ti = blockDim.x*blockIdx.x + i;
if (gc_x >= 1 && ti == nx_ + gc_x - 1) {
Q[j][i+1] = sign*Q[j][i];
}
if (gc_x >= 2 && ti == nx_ + gc_x - 2) {
Q[j][i+3] = sign*Q[j][i];
}
if (gc_x >= 3 && ti == nx_ + gc_x - 3) {
Q[j][i+5] = sign*Q[j][i];
}
if (gc_x >= 4 && ti == nx_ + gc_x - 4) {
Q[j][i+7] = sign*Q[j][i];
}
if (gc_x >= 5 && ti == nx_ + gc_x - 5) {
Q[j][i+9] = sign*Q[j][i];
}
}
}
// South boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcSouthReflective(float Q[h+2*gc_y][w+2*gc_x],
const int nx_, const int ny_) {
for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
const int j = threadIdx.y + gc_y;
const int tj = blockDim.y*blockIdx.y + j;
if (gc_y >= 1 && tj == gc_y) {
Q[j-1][i] = sign*Q[j][i];
}
if (gc_y >= 2 && tj == gc_y + 1) {
Q[j-3][i] = sign*Q[j][i];
}
if (gc_y >= 3 && tj == gc_y + 2) {
Q[j-5][i] = sign*Q[j][i];
}
if (gc_y >= 4 && tj == gc_y + 3) {
Q[j-7][i] = sign*Q[j][i];
}
if (gc_y >= 5 && tj == gc_y + 4) {
Q[j-9][i] = sign*Q[j][i];
}
}
}
// North boundary
template<int w, int h, int gc_x, int gc_y, int sign>
__device__ void bcNorthReflective(float Q[h+2*gc_y][w+2*gc_x], const int nx_, const int ny_) {
for (int i=threadIdx.x; i<w+2*gc_x; i+=w) {
const int j = threadIdx.y + gc_y;
const int tj = blockDim.y*blockIdx.y + j;
if (gc_y >= 1 && tj == ny_ + gc_y - 1) {
Q[j+1][i] = sign*Q[j][i];
}
if (gc_y >= 2 && tj == ny_ + gc_y - 2) {
Q[j+3][i] = sign*Q[j][i];
}
if (gc_y >= 3 && tj == ny_ + gc_y - 3) {
Q[j+5][i] = sign*Q[j][i];
}
if (gc_y >= 4 && tj == ny_ + gc_y - 4) {
Q[j+7][i] = sign*Q[j][i];
}
if (gc_y >= 5 && tj == ny_ + gc_y - 5) {
Q[j+9][i] = sign*Q[j][i];
}
}
}
template<int w, int h, int gc_x, int gc_y, int vars>