mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-11-29 17:28:03 +01:00
Refactoring CudaArray and ArakawaA grid
This commit is contained in:
@@ -84,7 +84,6 @@ class CudaContext(object):
|
||||
self.cuda_device = cuda.Device(0)
|
||||
self.logger.info("Using '%s' GPU", self.cuda_device.name())
|
||||
self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
|
||||
self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))
|
||||
|
||||
# Create the CUDA context
|
||||
if (self.blocking):
|
||||
@@ -92,6 +91,9 @@ class CudaContext(object):
|
||||
self.logger.warning("Using blocking context")
|
||||
else:
|
||||
self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
|
||||
|
||||
free, total = cuda.mem_get_info()
|
||||
self.logger.debug(" => memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
|
||||
|
||||
self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))
|
||||
|
||||
@@ -294,7 +296,7 @@ class CudaArray2D:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data):
|
||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
@@ -307,16 +309,18 @@ class CudaArray2D:
|
||||
#self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
||||
|
||||
#Make sure data is in proper format
|
||||
assert np.issubdtype(cpu_data.dtype, np.float32), "Wrong datatype: %s" % str(cpu_data.dtype)
|
||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||
if cpu_data is not None:
|
||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||
|
||||
#Upload data to the device
|
||||
if (cpu_data.shape == (ny_halo, nx_halo)):
|
||||
if cpu_data is None:
|
||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), dtype)
|
||||
elif (cpu_data.shape == (ny_halo, nx_halo)):
|
||||
self.data = pycuda.gpuarray.to_gpu_async(cpu_data, stream=stream)
|
||||
elif (cpu_data.shape == (self.ny, self.nx)):
|
||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), cpu_data.dtype)
|
||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), dtype)
|
||||
#self.data.fill(0.0)
|
||||
|
||||
#Create copy object from host to device
|
||||
@@ -337,7 +341,6 @@ class CudaArray2D:
|
||||
#Perform the copy
|
||||
copy(stream)
|
||||
stream.synchronize()
|
||||
|
||||
else:
|
||||
assert False, "Wrong data shape: %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo)))
|
||||
|
||||
@@ -390,36 +393,31 @@ class CudaArray2D:
|
||||
"""
|
||||
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
||||
"""
|
||||
class SWEDataArakawaA:
|
||||
class ArakawaA2D:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.h0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
self.h1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
"""
|
||||
Swaps the variables after a timestep has been completed
|
||||
"""
|
||||
def swap(self):
|
||||
self.h1, self.h0 = self.h0, self.h1
|
||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
||||
self.gpu_variables = []
|
||||
for cpu_variable in cpu_variables:
|
||||
self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
|
||||
|
||||
def __getitem__(self, key):
|
||||
assert type(key) == int, "Indexing is int based"
|
||||
if (key > len(self.gpu_variables) or key < 0):
|
||||
raise IndexError("Out of bounds")
|
||||
return self.gpu_variables[key]
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, stream):
|
||||
h_cpu = self.h0.download(stream, async=True)
|
||||
hu_cpu = self.hu0.download(stream, async=True)
|
||||
hv_cpu = self.hv0.download(stream, async=False)
|
||||
|
||||
return h_cpu, hu_cpu, hv_cpu
|
||||
cpu_variables = []
|
||||
for gpu_variable in self.gpu_variables:
|
||||
cpu_variables += [gpu_variable.download(stream, async=True)]
|
||||
stream.synchronize()
|
||||
return cpu_variables
|
||||
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -60,9 +60,7 @@ class FORCE (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -73,6 +71,16 @@ class FORCE (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "First order centered"
|
||||
|
||||
@@ -84,13 +92,14 @@ class FORCE (Simulator.BaseSimulator):
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
@@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -55,9 +55,7 @@ class HLL (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -68,6 +66,16 @@ class HLL (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Harten-Lax-van Leer"
|
||||
|
||||
@@ -79,13 +87,14 @@ class HLL (Simulator.BaseSimulator):
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -59,9 +59,7 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -74,6 +72,16 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Harten-Lax-van Leer (2nd order)"
|
||||
|
||||
@@ -90,13 +98,13 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
@@ -106,12 +114,14 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
@@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -60,9 +60,7 @@ class KP07 (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -75,6 +73,16 @@ class KP07 (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Kurganov-Petrova 2007"
|
||||
|
||||
@@ -88,13 +96,13 @@ class KP07 (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(substep), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def stepEuler(self, dt):
|
||||
self.substepRK(dt, 0)
|
||||
@@ -108,5 +116,4 @@ class KP07 (Simulator.BaseSimulator):
|
||||
self.t += dt
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
return self.u0.download(self.stream)
|
||||
@@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -60,9 +60,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -75,6 +73,16 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Kurganov-Petrova 2007 dimensionally split"
|
||||
|
||||
@@ -91,13 +99,13 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
@@ -107,13 +115,14 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -56,9 +56,7 @@ class LxF (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -68,6 +66,16 @@ class LxF (Simulator.BaseSimulator):
|
||||
"iiffffPiPiPiPiPiPi", \
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Lax Friedrichs"
|
||||
@@ -80,13 +88,14 @@ class LxF (Simulator.BaseSimulator):
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
|
||||
@@ -48,9 +48,7 @@ class BaseSimulator:
|
||||
"""
|
||||
def __init__(self, \
|
||||
context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height):
|
||||
@@ -67,14 +65,6 @@ class BaseSimulator:
|
||||
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
#Create data by uploading to device
|
||||
free, total = cuda.mem_get_info()
|
||||
self.logger.debug("GPU memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
@@ -94,7 +84,7 @@ class BaseSimulator:
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
)
|
||||
|
||||
"""
|
||||
Function which simulates forward in time using the default simulation type
|
||||
@@ -192,7 +182,7 @@ class BaseSimulator:
|
||||
return self.t
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def synchronize(self):
|
||||
self.stream.synchronize()
|
||||
|
||||
@@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@@ -55,9 +55,7 @@ class WAF (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@@ -68,6 +66,16 @@ class WAF (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Weighted average flux"
|
||||
|
||||
@@ -79,30 +87,33 @@ class WAF (Simulator.BaseSimulator):
|
||||
|
||||
def stepDimsplitXY(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(0), \
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(1), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(1), \
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
Reference in New Issue
Block a user