2018-06-14 10:35:01 +02:00

288 lines
8.9 KiB
Python

import pyopencl
import os
import numpy as np
"""
Static function which reads a text file and creates an OpenCL kernel from that
"""
def get_kernel(cl_ctx, kernel_filename, block_width, block_height):
import datetime
#Create define string
define_string = "#define block_width " + str(block_width) + "\n"
define_string += "#define block_height " + str(block_height) + "\n\n"
define_string += "#ifndef my_variable_to_force_recompilation\n"
define_string += "#define my_variable_to_force_recompilation " + datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") + "\n"
define_string += "#undef my_variable_to_force_recompilation \n"
define_string += "#endif\n\n"
def shellquote(s):
assert(cl_ctx.num_devices == 1)
platform_name = cl_ctx.devices[0].get_info(pyopencl.device_info.PLATFORM).name
platform_name = platform_name.upper()
if ('INTEL' in platform_name):
#Intel CL compiler doesn't like spaces in include paths. We have to escape them
return '"' + s.replace(" ", "\\ ") + '"'
elif ('NVIDIA' in platform_name):
#NVIDIA doesn't like double quoted paths...
return "'" + s + "'"
module_path = os.path.dirname(os.path.realpath(__file__))
module_path_escaped = shellquote(module_path)
options = ['-I', module_path_escaped]
#Read the proper program
fullpath = os.path.join(module_path, kernel_filename)
with open(fullpath, "r") as kernel_file:
kernel_string = define_string + kernel_file.read()
kernel = pyopencl.Program(cl_ctx, kernel_string).build(options)
return kernel
"""
Class that holds data
"""
class OpenCLArray2D:
"""
Uploads initial data to the CL device
"""
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, data):
host_data = self.convert_to_float32(data)
self.nx = nx
self.ny = ny
self.nx_halo = nx + 2*halo_x
self.ny_halo = ny + 2*halo_y
assert(host_data.shape[1] == self.nx_halo)
assert(host_data.shape[0] == self.ny_halo)
assert(data.shape == (self.ny_halo, self.nx_halo))
#Upload data to the device
mf = pyopencl.mem_flags
self.data = pyopencl.Buffer(cl_ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=host_data)
self.bytes_per_float = host_data.itemsize
assert(self.bytes_per_float == 4)
self.pitch = np.int32((self.nx_halo)*self.bytes_per_float)
"""
Enables downloading data from CL device to Python
"""
def download(self, cl_queue):
#Allocate data on the host for result
host_data = np.empty((self.ny_halo, self.nx_halo), dtype=np.float32, order='C')
#Copy data from device to host
pyopencl.enqueue_copy(cl_queue, host_data, self.data)
#Return
return host_data
"""
Converts to C-style float 32 array suitable for the GPU/OpenCL
"""
@staticmethod
def convert_to_float32(data):
if (not np.issubdtype(data.dtype, np.float32) or np.isfortran(data)):
print "Converting H0"
return data.astype(np.float32, order='C')
else:
return data
"""
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
"""
class SWEDataArkawaA:
"""
Uploads initial data to the CL device
"""
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
"""
Swaps the variables after a timestep has been completed
"""
def swap(self):
self.h1, self.h0 = self.h0, self.h1
self.hu1, self.hu0 = self.hu0, self.hu1
self.hv1, self.hv0 = self.hv0, self.hv1
"""
Enables downloading data from CL device to Python
"""
def download(self, cl_queue):
h_cpu = self.h0.download(cl_queue)
hu_cpu = self.hu0.download(cl_queue)
hv_cpu = self.hv0.download(cl_queue)
return h_cpu, hu_cpu, hv_cpu
"""
A class representing an Akrawa A type (unstaggered, logically Cartesian) grid
"""
class SWEDataArkawaA:
"""
Uploads initial data to the CL device
"""
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
self.hu0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
self.hu1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hu0)
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, hv0)
"""
Swaps the variables after a timestep has been completed
"""
def swap(self):
self.h1, self.h0 = self.h0, self.h1
self.hu1, self.hu0 = self.hu0, self.hu1
self.hv1, self.hv0 = self.hv0, self.hv1
"""
Enables downloading data from CL device to Python
"""
def download(self, cl_queue):
h_cpu = self.h0.download(cl_queue)
hu_cpu = self.hu0.download(cl_queue)
hv_cpu = self.hv0.download(cl_queue)
return h_cpu, hu_cpu, hv_cpu
"""
A class representing an Akrawa C type (staggered, u fluxes on east/west faces, v fluxes on north/south faces) grid
We use h as cell centers
"""
class SWEDataArkawaC:
"""
Uploads initial data to the CL device
"""
def __init__(self, cl_ctx, nx, ny, halo_x, halo_y, h0, hu0, hv0):
#FIXME: This at least works for 0 and 1 ghost cells, but not convinced it generalizes
assert(halo_x <= 1 and halo_y <= 1)
self.h0 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
self.hu0 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
self.hv0 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
self.h1 = OpenCLArray2D(cl_ctx, nx, ny, halo_x, halo_y, h0)
self.hu1 = OpenCLArray2D(cl_ctx, nx+1, ny, 0, halo_y, hu0)
self.hv1 = OpenCLArray2D(cl_ctx, nx, ny+1, halo_x, 0, hv0)
"""
Swaps the variables after a timestep has been completed
"""
def swap(self):
#h is assumed to be constant (bottom topography really)
self.h1, self.h0 = self.h0, self.h1
self.hu1, self.hu0 = self.hu0, self.hu1
self.hv1, self.hv0 = self.hv0, self.hv1
"""
Enables downloading data from CL device to Python
"""
def download(self, cl_queue):
h_cpu = self.h0.download(cl_queue)
hu_cpu = self.hu0.download(cl_queue)
hv_cpu = self.hv0.download(cl_queue)
return h_cpu, hu_cpu, hv_cpu
"""
Class which represents different wind stresses
"""
class WindStressParams:
"""
wind_type: TYpe of wind stress, 0=Uniform along shore, 1=bell shaped along shore, 2=moving cyclone
wind_tau0: Amplitude of wind stress (Pa)
wind_rho: Density of sea water (1025.0 kg / m^3)
wind_alpha: Offshore e-folding length (1/(10*dx) = 5e-6 m^-1)
wind_xm: Maximum wind stress for bell shaped wind stress
wind_Rc: Distance to max wind stress from center of cyclone (10dx = 200 000 m)
wind_x0: Initial x position of moving cyclone (dx*(nx/2) - u0*3600.0*48.0)
wind_y0: Initial y position of moving cyclone (dy*(ny/2) - v0*3600.0*48.0)
wind_u0: Translation speed along x for moving cyclone (30.0/sqrt(5.0))
wind_v0: Translation speed along y for moving cyclone (-0.5*u0)
"""
def __init__(self,
type=99, # "no wind" \
tau0=0, rho=0, alpha=0, xm=0, Rc=0, \
x0=0, y0=0, \
u0=0, v0=0):
self.type = np.int32(type)
self.tau0 = np.float32(tau0)
self.rho = np.float32(rho)
self.alpha = np.float32(alpha)
self.xm = np.float32(xm)
self.Rc = np.float32(Rc)
self.x0 = np.float32(x0)
self.y0 = np.float32(y0)
self.u0 = np.float32(u0)
self.v0 = np.float32(v0)