mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
288 lines
8.9 KiB
Python
288 lines
8.9 KiB
Python
import pyopencl
|
|
import os
|
|
import numpy as np
|
|
|
|
"""
|
|
Static function which reads a text file and creates an OpenCL kernel from that
|
|
"""
|
|
def get_kernel(cl_ctx, kernel_filename, block_width, block_height):
|
|
import datetime
|
|
|
|
#Create define string
|
|
define_string = "#define block_width " + str(block_width) + "\n"
|
|
define_string += "#define block_height " + str(block_height) + "\n\n"
|
|
define_string += "#ifndef my_variable_to_force_recompilation\n"
|
|
define_string += "#define my_variable_to_force_recompilation " + datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + "\n"
|
|
define_string += "#undef my_variable_to_force_recompilation \n"
|
|
define_string += "#endif\n\n"
|
|
|
|
|
|
def shellquote(s):
|
|
assert(cl_ctx.num_devices == 1)
|
|
platform_name = cl_ctx.devices[0].get_info(pyopencl.device_info.PLATFORM).name
|
|
platform_name = platform_name.upper()
|
|
if ('INTEL' in platform_name):
|
|
#Intel CL compiler doesn't like spaces in include paths. We have to escape them
|
|
return '"' + s.replace(" ", "\\ ") + '"'
|
|
elif ('NVIDIA' in platform_name):
|
|
#NVIDIA doesn't like double quoted paths...
|
|
return "'" + s + "'"
|
|
|
|
module_path = os.path.dirname(os.path.realpath(__file__))
|
|
module_path_escaped = shellquote(module_path)
|
|
options = ['-I', module_path_escaped]
|
|
|
|
#Read the proper program
|
|
fullpath = os.path.join(module_path, kernel_filename)
|
|
with open(fullpath, "r") as kernel_file:
|
|
kernel_string = define_string + kernel_file.read()
|
|
kernel = pyopencl.Program(cl_ctx, kernel_string).build(options)
|
|
|
|
return kernel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
Class that holds data
|
|
"""
|
|
class OpenCLArray2D:
|
|
"""
|
|
Uploads initial data to the CL device
|
|
"""
|
|
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, data):
|
|
host_data = self.convert_to_float32(data)
|
|
|
|
self.nx = nx
|
|
self.ny = ny
|
|
self.nx_halo = nx + 2*halo_x
|
|
self.ny_halo = ny + 2*halo_y
|
|
assert(host_data.shape[1] == self.nx_halo)
|
|
assert(host_data.shape[0] == self.ny_halo)
|
|
|
|
assert(data.shape == (self.ny_halo, self.nx_halo))
|
|
|
|
#Upload data to the device
|
|
mf = pyopencl.mem_flags
|
|
self.data = pyopencl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=host_data)
|
|
|
|
self.bytes_per_float = host_data.itemsize
|
|
assert(self.bytes_per_float == 4)
|
|
self.pitch = np.int32((self.nx_halo)*self.bytes_per_float)
|
|
|
|
|
|
"""
|
|
Enables downloading data from CL device to Python
|
|
"""
|
|
def download(self, cl_queue):
|
|
#Allocate data on the host for result
|
|
host_data = np.empty((self.ny_halo, self.nx_halo), dtype=np.float32, order='C')
|
|
|
|
#Copy data from device to host
|
|
pyopencl.enqueue_copy(cl_queue, host_data, self.data)
|
|
|
|
#Return
|
|
return host_data
|
|
|
|
"""
|
|
Converts to C-style float 32 array suitable for the GPU/OpenCL
|
|
"""
|
|
@staticmethod
|
|
def convert_to_float32(data):
|
|
if (not np.issubdtype(data.dtype, np.float32) or np.isfortran(data)):
|
|
print "Converting H0"
|
|
return data.astype(np.float32, order='C')
|
|
else:
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
|
|
"""
|
|
class SWEDataArkawaA:
|
|
"""
|
|
Uploads initial data to the CL device
|
|
"""
|
|
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
|
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
|
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
|
|
|
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
|
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
|
|
|
"""
|
|
Swaps the variables after a timestep has been completed
|
|
"""
|
|
def swap(self):
|
|
self.h1, self.h0 = self.h0, self.h1
|
|
self.hu1, self.hu0 = self.hu0, self.hu1
|
|
self.hv1, self.hv0 = self.hv0, self.hv1
|
|
|
|
"""
|
|
Enables downloading data from CL device to Python
|
|
"""
|
|
def download(self, cl_queue):
|
|
h_cpu = self.h0.download(cl_queue)
|
|
hu_cpu = self.hu0.download(cl_queue)
|
|
hv_cpu = self.hv0.download(cl_queue)
|
|
|
|
return h_cpu, hu_cpu, hv_cpu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
|
|
"""
|
|
class SWEDataArkawaA:
|
|
"""
|
|
Uploads initial data to the CL device
|
|
"""
|
|
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
|
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
|
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
|
|
|
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
|
|
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
|
|
|
|
"""
|
|
Swaps the variables after a timestep has been completed
|
|
"""
|
|
def swap(self):
|
|
self.h1, self.h0 = self.h0, self.h1
|
|
self.hu1, self.hu0 = self.hu0, self.hu1
|
|
self.hv1, self.hv0 = self.hv0, self.hv1
|
|
|
|
"""
|
|
Enables downloading data from CL device to Python
|
|
"""
|
|
def download(self, cl_queue):
|
|
h_cpu = self.h0.download(cl_queue)
|
|
hu_cpu = self.hu0.download(cl_queue)
|
|
hv_cpu = self.hv0.download(cl_queue)
|
|
|
|
return h_cpu, hu_cpu, hv_cpu
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
A class representing an Akrawa C type (staggered, u fluxes on east/west faces, v fluxes on north/south faces) grid
|
|
We use h as cell centers
|
|
"""
|
|
class SWEDataArkawaC:
|
|
"""
|
|
Uploads initial data to the CL device
|
|
"""
|
|
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
|
#FIXME: This at least works for 0 and 1 ghost cells, but not convinced it generalizes
|
|
assert(halo_x <= 1 and halo_y <= 1)
|
|
|
|
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
self.hu0 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
|
|
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
|
|
|
|
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
|
|
self.hu1 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
|
|
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
|
|
|
|
"""
|
|
Swaps the variables after a timestep has been completed
|
|
"""
|
|
def swap(self):
|
|
#h is assumed to be constant (bottom topography really)
|
|
self.h1, self.h0 = self.h0, self.h1
|
|
self.hu1, self.hu0 = self.hu0, self.hu1
|
|
self.hv1, self.hv0 = self.hv0, self.hv1
|
|
|
|
"""
|
|
Enables downloading data from CL device to Python
|
|
"""
|
|
def download(self, cl_queue):
|
|
h_cpu = self.h0.download(cl_queue)
|
|
hu_cpu = self.hu0.download(cl_queue)
|
|
hv_cpu = self.hv0.download(cl_queue)
|
|
|
|
return h_cpu, hu_cpu, hv_cpu
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
Class which represents different wind stresses
|
|
"""
|
|
class WindStressParams:
|
|
|
|
"""
|
|
wind_type: TYpe of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
|
|
wind_tau0: Amplitude of wind stress (Pa)
|
|
wind_rho: Density of sea water (1025.0 kg / m^3)
|
|
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
|
|
wind_xm: Maximum wind stress for bell shaped wind stress
|
|
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
|
|
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
|
|
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
|
|
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
|
|
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
|
|
"""
|
|
def __init__(self,
|
|
type=99, # "no wind" \
|
|
tau0=0, rho=0, alpha=0, xm=0, Rc=0, \
|
|
x0=0, y0=0, \
|
|
u0=0, v0=0):
|
|
self.type = np.int32(type)
|
|
self.tau0 = np.float32(tau0)
|
|
self.rho = np.float32(rho)
|
|
self.alpha = np.float32(alpha)
|
|
self.xm = np.float32(xm)
|
|
self.Rc = np.float32(Rc)
|
|
self.x0 = np.float32(x0)
|
|
self.y0 = np.float32(y0)
|
|
self.u0 = np.float32(u0)
|
|
self.v0 = np.float32(v0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|