Euler appears to work now

This commit is contained in:
André R. Brodtkorb 2018-11-05 16:46:37 +01:00
parent 0671bd747a
commit e38885d39b
13 changed files with 702 additions and 19 deletions

519
EulerTesting.ipynb Normal file
View File

@ -0,0 +1,519 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Lets have matplotlib \"inline\"\n",
"%matplotlib inline\n",
"\n",
"# Add line profiler\n",
"%load_ext line_profiler\n",
"\n",
"#Import packages we need\n",
"import numpy as np\n",
"from matplotlib import animation, rc\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib.colorbar import ColorbarBase\n",
"from matplotlib.colors import Normalize\n",
"\n",
"from IPython.display import display, HTML\n",
"\n",
"from enum import Enum\n",
"import subprocess\n",
"import os\n",
"import gc\n",
"import datetime\n",
"import importlib\n",
"import logging\n",
"import netCDF4\n",
"\n",
"import pycuda.driver as cuda\n",
"import pycuda.compiler\n",
"\n",
"try:\n",
" from StringIO import StringIO\n",
"except ImportError:\n",
" from io import StringIO\n",
" \n",
"#Set large figure sizes as default\n",
"rc('figure', figsize=(16.0, 12.0))\n",
"rc('animation', html='html5')\n",
"rc('animation', bitrate=1800)\n",
"\n",
"from GPUSimulators import Common, IPythonMagic"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%setup_logging --out test_schemes.log"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%cuda_context_handler --no_autotuning my_context "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def genShockBubble(nx, ny, gamma):\n",
" width = 4.0\n",
" height = 1.0\n",
" dx = width / float(nx)\n",
" dy = height / float(ny)\n",
"\n",
" x_center = 0.5 #dx*nx/2.0\n",
" y_center = 0.5 #dy*ny/2.0\n",
"\n",
"\n",
" rho = np.ones((ny, nx), dtype=np.float32)\n",
" u = np.zeros((ny, nx), dtype=np.float32)\n",
" v = np.zeros((ny, nx), dtype=np.float32)\n",
" E = np.zeros((ny, nx), dtype=np.float32)\n",
" p = np.ones((ny, nx), dtype=np.float32)\n",
"\n",
" x = (dx*(np.arange(0, nx, dtype=np.float32)+0.5) - x_center)\n",
" y = (dy*(np.arange(0, ny, dtype=np.float32)+0.5) - y_center)\n",
" xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')\n",
" \n",
" #Bubble\n",
" radius = 0.25\n",
" bubble = np.sqrt(xv**2+yv**2) <= radius\n",
" rho = np.where(bubble, 0.1, rho)\n",
" \n",
" #Left boundary\n",
" left = (xv - xv.min() < 0.1)\n",
" rho = np.where(left, 3.81250, rho)\n",
" u = np.where(left, 2.57669, u)\n",
" \n",
" #Energy\n",
" p = np.where(left, 10.0, p)\n",
" E = 0.5*rho*(u**2+v**2) + p/(gamma-1.0)\n",
" \n",
" #Estimate dt\n",
" scale = 0.9\n",
" max_rho_estimate = 5.0\n",
" max_u_estimate = 5.0\n",
" dx = width/nx\n",
" dy = height/ny\n",
" dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(gamma*max_rho_estimate))\n",
"\n",
" return rho, rho*u, rho*v, E, dx, dy, dt\n",
"\n",
"nx = 400\n",
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genShockBubble(nx, nx//4, 1.4)\n",
"plt.figure()\n",
"plt.imshow(rho0)\n",
"plt.colorbar(orientation='vertical', aspect=20, pad=0.02, shrink=0.3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def genKelvinHelmholz(nx, ny, gamma, roughness=0.5):\n",
" \"\"\"\n",
" Roughness parameter in (0, 1.0] determines how squiggly the interface betweeen the zones is\n",
" \"\"\"\n",
" \n",
" def genZones(nx, ny, n):\n",
" \"\"\"\n",
" Generates the zones of the two fluids of K-H\n",
" \"\"\"\n",
" zone = np.zeros((ny, nx), dtype=np.int32)\n",
"\n",
" dx = 1.0 / nx\n",
" dy = 1.0 / ny\n",
"\n",
" def genSmoothRandom(nx, n):\n",
" assert (n <= nx), \"Number of generated points nx must be larger than n\"\n",
" \n",
" if n == nx:\n",
" return np.random.random(nx)-0.5\n",
" else:\n",
" from scipy.interpolate import interp1d\n",
"\n",
" #Control points and interpolator\n",
" xp = np.linspace(0.0, 1.0, n)\n",
" yp = np.random.random(n) - 0.5\n",
" f = interp1d(xp, yp, kind='cubic')\n",
"\n",
" #Interpolation points\n",
" x = np.linspace(0.0, 1.0, nx)\n",
" return f(x)\n",
"\n",
"\n",
"\n",
" x = np.linspace(0, 1, nx)\n",
" y = np.linspace(0, 1, ny)\n",
"\n",
" _, y = np.meshgrid(x, y)\n",
"\n",
" #print(y+a[0])\n",
"\n",
" a = genSmoothRandom(nx, n)*dy\n",
" zone = np.where(y > 0.25+a, zone, 1)\n",
"\n",
" a = genSmoothRandom(nx, n)*dy\n",
" zone = np.where(y < 0.75+a, zone, 1)\n",
" \n",
" return zone\n",
" \n",
" width = 2.0\n",
" height = 1.0\n",
" dx = width / float(nx)\n",
" dy = height / float(ny)\n",
"\n",
" rho = np.empty((ny, nx), dtype=np.float32)\n",
" u = np.empty((ny, nx), dtype=np.float32)\n",
" v = np.zeros((ny, nx), dtype=np.float32)\n",
" p = 2.5*np.ones((ny, nx), dtype=np.float32)\n",
"\n",
" #Generate the different zones \n",
" zones = genZones(nx, ny, max(1, min(nx, int(nx*roughness))))\n",
" \n",
" #Zone 0\n",
" zone0 = zones == 0\n",
" rho = np.where(zone0, 1.0, rho)\n",
" u = np.where(zone0, 0.5, u)\n",
" \n",
" #Zone 1\n",
" zone1 = zones == 1\n",
" rho = np.where(zone1, 2.0, rho)\n",
" u = np.where(zone1, -0.5, u)\n",
" \n",
" E = 0.5*rho*(u**2+v**2) + p/(gamma-1.0)\n",
" \n",
" #Estimate dt\n",
" scale = 0.9\n",
" max_rho_estimate = 3.0\n",
" max_u_estimate = 2.0\n",
" dx = width/nx\n",
" dy = height/ny\n",
" dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(gamma*max_rho_estimate))\n",
"\n",
" return rho, rho*u, rho*v, E, dx, dy, dt\n",
"\n",
"nx = 400\n",
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genKelvinHelmholz(nx, nx//4, 1.4, 0.25)\n",
"plt.figure()\n",
"plt.imshow(rho0)\n",
"plt.colorbar(orientation='vertical', aspect=20, pad=0.02, shrink=0.3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma):\n",
" #Construct simulator\n",
" arguments = {\n",
" 'context': my_context,\n",
" 'rho': rho0, 'rho_u': rho_u0, 'rho_v': rho_v0, 'E': E0,\n",
" 'nx': nx, 'ny': ny,\n",
" 'dx': dx, 'dy': dy, 'dt': 0.45*dt,\n",
" 'gamma': gamma\n",
" } \n",
" with Common.Timer(\"construct\") as t:\n",
" from GPUSimulators import EE2D_KP07_dimsplit\n",
" importlib.reload(EE2D_KP07_dimsplit)\n",
" sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**arguments)\n",
" print(\"Constructed in \" + str(t.secs) + \" seconds\")\n",
"\n",
" #Create netcdf file and simulate\n",
" with Common.DataDumper(outfile, mode='w', clobber=False) as outdata:\n",
" outdata.ncfile.createDimension('time', None)\n",
" outdata.ncfile.createDimension('x', nx)\n",
" outdata.ncfile.createDimension('y', ny)\n",
"\n",
" outdata.time_var = outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time')\n",
" outdata.x_var = outdata.ncfile.createVariable('x', np.dtype('float32').char, 'x')\n",
" outdata.y_var = outdata.ncfile.createVariable('y', np.dtype('float32').char, 'y')\n",
" outdata.rho_var = outdata.ncfile.createVariable('rho', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
" outdata.rho_u_var = outdata.ncfile.createVariable('rho_u', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
" outdata.rho_v_var = outdata.ncfile.createVariable('rho_v', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
" outdata.E_var = outdata.ncfile.createVariable('E', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
"\n",
" outdata.x_var[:] = np.linspace(0, nx*dx, nx)\n",
" outdata.y_var[:] = np.linspace(0, ny*dy, ny)\n",
"\n",
" progress_printer = Common.ProgressPrinter(n_save, print_every=10)\n",
" for i in range(n_save+1):\n",
" #Sanity check simulator\n",
" sim.check()\n",
"\n",
" #Simulate\n",
" if (i > 0):\n",
" sim.simulate(t_end/n_save)\n",
"\n",
" #Save to file\n",
" #rho = sim.u0[0].download(sim.stream)\n",
" rho, rho_u, rho_v, E = sim.download()\n",
" outdata.time_var[i] = sim.simTime()\n",
" outdata.rho_var[i, :] = rho\n",
" outdata.rho_u_var[i, :] = rho_u\n",
" outdata.rho_v_var[i, :] = rho_v\n",
" outdata.E_var[i, :] = E\n",
"\n",
" #Write progress to screen\n",
" print_string = progress_printer.getPrintString(i)\n",
" if (print_string):\n",
" print(print_string)\n",
"\n",
" return outdata.filename "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def genSchlieren(rho):\n",
" #Compute length of z-component of normalized gradient vector \n",
" normal = np.gradient(rho) #[x, y, 1]\n",
" length = 1.0 / np.sqrt(normal[0]**2 + normal[1]**2 + 1.0**2)\n",
" schlieren = np.power(length, 128)\n",
" return schlieren\n",
"\n",
"\n",
"def genCurl(rho, rho_u, rho_v):\n",
" u = rho_u / rho\n",
" v = rho_v / rho\n",
" u = np.sqrt(u**2 + v**2)\n",
" u_max = u.max()\n",
" \n",
" du_dy, du_dx = np.gradient(u)\n",
" dv_dy, dv_dx = np.gradient(v)\n",
" \n",
" curl = dv_dx - du_dy\n",
" return curl\n",
"\n",
"\n",
"def genColors(rho, rho_u, rho_v, cmap, vmax, vmin):\n",
" schlieren = genSchlieren(rho)\n",
" curl = genCurl(rho, rho_u, rho_v)\n",
" \n",
" colors = Normalize(vmin, vmax, clip=True)(curl)\n",
" colors = cmap(colors)\n",
" for k in range(3):\n",
" colors[:,:,k] = colors[:,:,k]*schlieren\n",
" \n",
" return colors"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class VisType(Enum):\n",
" Schlieren = 0\n",
" Density = 1\n",
"\n",
"def makeAnimation(infile, vis_type, vmin, vmax, save_anim=False, cmap=plt.cm.coolwarm):\n",
" fig = plt.figure(figsize=(16, 8))#, tight_layout=True)\n",
" \n",
" with Common.DataDumper(infile, 'r') as indata:\n",
" time = indata.ncfile.variables['time'][:]\n",
" x = indata.ncfile.variables['x'][:]\n",
" y = indata.ncfile.variables['y'][:]\n",
" rho = indata.ncfile.variables['rho'][0]\n",
" rho_u = indata.ncfile.variables['rho_u'][0]\n",
" rho_v = indata.ncfile.variables['rho_v'][0]\n",
" num_frames = len(time)\n",
"\n",
" ax1 = fig.gca()\n",
" extents = [0, x.max(), 0, y.max()]\n",
" \n",
" if (vis_type == VisType.Schlieren):\n",
" im = ax1.imshow(genColors(rho, rho_u, rho_v, cmap, vmax, vmin), origin='bottom', extent=extents, cmap='gray', vmin=0.0, vmax=1.0)\n",
" fig.suptitle(\"Schlieren / vorticity at t={:.2f}\".format(time[0]))\n",
" elif (vis_type == VisType.Density):\n",
" im = ax1.imshow(rho, origin='bottom', extent=extents, cmap=cmap, vmin=vmin, vmax=vmax)\n",
" fig.suptitle(\"Density at t={:.2f}\".format(time[0]))\n",
" else:\n",
" assert False, \"Wrong vis_type\"\n",
"\n",
" #Create colorbar\n",
" from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
" divider = make_axes_locatable(ax1)\n",
" ax2 = divider.append_axes(\"right\", size=0.1, pad=0.05)\n",
" cb1 = ColorbarBase(ax2, cmap=cmap,\n",
" norm=Normalize(vmin=vmin, vmax=vmax),\n",
" orientation='vertical')\n",
" \n",
" #Label colorbar\n",
" if (vis_type == VisType.Schlieren):\n",
" cb1.set_label('Vorticity')\n",
" elif (vis_type == VisType.Density):\n",
" cb1.set_label('Density')\n",
"\n",
" progress_printer = Common.ProgressPrinter(num_frames, print_every=5)\n",
" \n",
" def animate(i):\n",
" rho = indata.ncfile.variables['rho'][i]\n",
" rho_u = indata.ncfile.variables['rho_u'][i]\n",
" rho_v = indata.ncfile.variables['rho_v'][i]\n",
" \n",
" if (vis_type == VisType.Schlieren):\n",
" im.set_data(genColors(rho, rho_u, rho_v, cmap, vmax, vmin))\n",
" fig.suptitle(\"Schlieren / vorticity at t={:.2f}\".format(time[i]))\n",
" elif (vis_type == VisType.Density):\n",
" im.set_data(rho) \n",
" fig.suptitle(\"Density at t={:.2f}\".format(time[i]))\n",
" \n",
" #Write progress to screen\n",
" print_string = progress_printer.getPrintString(i)\n",
" if (print_string):\n",
" print(print_string)\n",
"\n",
" anim = animation.FuncAnimation(fig, animate, interval=50, frames=range(num_frames))\n",
" plt.close()\n",
"\n",
" if (save_anim):\n",
" root, _ = os.path.splitext(infile)\n",
" movie_outpath = os.path.abspath(root + \".mp4\")\n",
" if (os.path.isfile(movie_outpath)):\n",
" print(\"Reusing previously created movie \" + movie_outpath)\n",
" else:\n",
" print(\"Creating movie \" + movie_outpath)\n",
" from matplotlib.animation import FFMpegFileWriter\n",
" writer = FFMpegFileWriter(fps=25)\n",
" anim.save(movie_outpath, writer=writer)\n",
" display(HTML(\"\"\"\n",
" <div align=\"middle\">\n",
" <video width=\"80%\" controls>\n",
" <source src=\"{:s}\" type=\"video/mp4\">\n",
" </video>\n",
" </div>\n",
" \"\"\".format(movie_outpath)))\n",
" else:\n",
" #plt.rcParams[\"animation.html\"] = \"html5\"\n",
" plt.rcParams[\"animation.html\"] = \"jshtml\"\n",
" display(anim)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nx = 1600\n",
"ny = nx//4\n",
"gamma = 1.4\n",
"t_end = 3.0\n",
"n_save = 200\n",
"outfile = \"data/euler_shock-bubble.nc\"\n",
"outdata = None\n",
"\n",
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genShockBubble(nx, ny, gamma)\n",
"outfile = makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#makeAnimation('data/euler_simdata_0000.nc')\n",
"makeAnimation(outfile, vis_type=VisType.Schlieren, vmin=-0.2, vmax=0.2, cmap=plt.cm.RdBu)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nx = 1600\n",
"ny = nx//2\n",
"gamma = 1.4\n",
"roughness = 0.125\n",
"t_end = 2.0#10.0\n",
"n_save = 100#1000\n",
"outfile = \"data/euler_kelvin_helmholz.nc\"\n",
"outdata = None\n",
"\n",
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genKelvinHelmholz(nx, ny, gamma, roughness)\n",
"outfile = makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"makeAnimation(outfile, vis_type=VisType.Density, vmin=1.0, vmax=2.0)\n",
"#makeAnimation('data/euler_kelvin_helmholz_0012.nc', cmap=plt.cm.coolwarm, save_anim=True, vmin=1.1, vmax=1.9)\n",
"#makeAnimation('data/euler_kelvin_helmholz_0011.nc', save_anim=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"makeAnimation(outfile, vis_type=VisType.Schlieren, vmin=-0.2, vmax=0.2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:anaconda3]",
"language": "python",
"name": "conda-env-anaconda3-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -29,16 +29,17 @@ import io
import hashlib
import logging
import gc
import netCDF4
import pycuda.compiler as cuda_compiler
import pycuda.gpuarray
import pycuda.driver as cuda
"""
Class which keeps track of time spent for a section of code
"""
class Timer(object):
"""
Class which keeps track of time spent for a section of code
"""
def __init__(self, tag, log_level=logging.DEBUG):
self.tag = tag
self.log_level = log_level
@ -63,9 +64,116 @@ class Timer(object):
class DataDumper(object):
"""
Simple class for holding a netCDF4 object
(handles opening and closing in a nice way)
Use as
with DataDumper("filename") as data:
...
"""
def __init__(self, filename, *args, **kwargs):
self.logger = logging.getLogger(__name__)
#Create directory if needed
dirname = os.path.dirname(filename)
if not os.path.isdir(dirname):
self.logger.info("Creating directory " + dirname)
os.makedirs(dirname)
#Get mode of file if we have that
mode = None
if (args):
mode = args[0]
elif (kwargs and 'mode' in kwargs.keys()):
mode = kwargs['mode']
#Create new unique file if writing
if (mode):
if (("w" in mode) or ("+" in mode) or ("a" in mode)):
i = 0
stem, ext = os.path.splitext(filename)
while (os.path.isfile(filename)):
filename = "{:s}_{:04d}{:s}".format(stem, i, ext)
i = i+1
self.filename = os.path.abspath(filename)
#Save arguments
self.args = args
self.kwargs = kwargs
#Log output
self.logger.info("Writing output to " + self.filename)
def __enter__(self):
self.logger.info("Opening " + self.filename)
if (self.args):
self.logger.info("Arguments: " + str(self.args))
if (self.kwargs):
self.logger.info("Keyword arguments: " + str(self.kwargs))
self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs)
return self
def __exit__(self, *args):
self.logger.info("Closing " + self.filename)
self.ncfile.close()
class ProgressPrinter(object):
"""
Small helper class for
"""
def __init__(self, total_steps, print_every=5):
self.logger = logging.getLogger(__name__)
self.start = time.time()
self.total_steps = total_steps
self.print_every = print_every
self.next_print_time = self.print_every
self.last_step = 0
self.secs_per_iter = None
def getPrintString(self, step):
elapsed = time.time() - self.start
if (elapsed > self.next_print_time):
dt = elapsed - (self.next_print_time - self.print_every)
dsteps = step - self.last_step
steps_remaining = self.total_steps - step
if (dsteps == 0):
return
self.last_step = step
self.next_print_time = elapsed + self.print_every
if not self.secs_per_iter:
self.secs_per_iter = dt / dsteps
self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps)
remaining_time = steps_remaining * self.secs_per_iter
return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format(
ProgressPrinter.progressBar(step, self.total_steps),
ProgressPrinter.timeString(elapsed + remaining_time),
ProgressPrinter.timeString(elapsed),
ProgressPrinter.timeString(remaining_time))
def timeString(seconds):
seconds = int(max(seconds, 1))
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
periods = [('h', hours), ('m', minutes), ('s', seconds)]
time_string = ' '.join('{}{}'.format(value, name)
for name, value in periods
if value)
return time_string
def progressBar(step, total_steps, width=30):
progress = np.round(width * step / total_steps).astype(np.int32)
progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%"
return progressbar

View File

@ -113,6 +113,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -131,6 +132,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -105,6 +105,7 @@ class FORCE (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -100,6 +100,7 @@ class HLL (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -110,6 +110,7 @@ class HLL2 (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -126,6 +127,7 @@ class HLL2 (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -110,6 +110,7 @@ class KP07 (Simulator.BaseSimulator):
def stepEuler(self, dt):
self.substepRK(dt, 0)
self.t += dt
self.nt += 1
def stepRK(self, dt, order):
if (order != 2):
@ -117,6 +118,7 @@ class KP07 (Simulator.BaseSimulator):
self.substepRK(dt, 0)
self.substepRK(dt, 1)
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -110,6 +110,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -126,6 +127,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -101,6 +101,7 @@ class LxF (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -80,8 +80,9 @@ class BaseSimulator:
#Create a CUDA stream
self.stream = cuda.Stream()
#Keep track of simulation time
self.t = 0.0;
#Keep track of simulation time and number of timesteps
self.t = 0.0
self.nt = 0
#Log progress every n seconds during simulation
self.log_every = 5
@ -209,14 +210,16 @@ class BaseSimulator:
def download(self):
raise(NotImplementedError("Needs to be implemented in subclass"))
def check(self):
raise(NotImplementedError("Needs to be implemented in subclass"))
def sim_time(self):
return self.t
def synchronize(self):
self.stream.synchronize()
def check(self):
raise(NotImplementedError("Needs to be implemented in subclass"))
def simTime(self):
return self.t
def simSteps(self):
return self.nt

View File

@ -103,6 +103,7 @@ class WAF (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def stepDimsplitYX(self, dt):
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -118,6 +119,7 @@ class WAF (Simulator.BaseSimulator):
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
self.u0, self.u1 = self.u1, self.u0
self.t += dt
self.nt += 1
def download(self):
return self.u0.download(self.stream)

View File

@ -154,10 +154,10 @@ __global__ void KP07DimsplitKernel(
//Read into shared memory
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_+4, ny_+4);
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_+4, ny_+4);
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_+4, ny_+4);
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_+4, ny_+4);
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_, ny_);
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_, ny_);
__syncthreads();
//Fix boundary conditions
@ -226,6 +226,26 @@ __global__ void KP07DimsplitKernel(
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
__syncthreads();
//This is the RK2-part
const int tx = threadIdx.x + gc;
const int ty = threadIdx.y + gc;
const float q1 = Q[0][ty][tx];
const float q2 = Q[1][ty][tx];
const float q3 = Q[2][ty][tx];
const float q4 = Q[3][ty][tx];
__syncthreads();
readBlock<w, h, gc>( rho1_ptr_, rho1_pitch_, Q[0], nx_, ny_);
readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
readBlock<w, h, gc>( E1_ptr_, E1_pitch_, Q[3], nx_, ny_);
__syncthreads();
Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
}

View File

@ -92,19 +92,29 @@ __device__ float desingularize(float x_, float eps_) {
template<int block_width, int block_height, int ghost_cells>
inline __device__ void readBlock(float* ptr_, int pitch_,
float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells],
const int max_x_, const int max_y_) {
const int nx_, const int ny_) {
//Index of block within domain
const int bx = blockDim.x * blockIdx.x;
const int by = blockDim.y * blockIdx.y;
const int gc_pad = 4;
//Read into shared memory
//Loop over all variables
for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
const int l = min(by + j, max_y_-1);
//const int l = min(by + j, ny_+2*ghost_cells-1);
const int y = by + j;
const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad);
const int l = y + y_offset;
float* row = (float*) ((char*) ptr_ + pitch_*l);
for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
const int k = min(bx + i, max_x_-1);
//const int k = min(bx + i, nx_+2*ghost_cells-1);
const int x = bx + i;
const int gc_pad = 4;
const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad);
const int k = x + x_offset;
shmem[j][i] = row[k];
}
@ -248,6 +258,16 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
template<int block_width, int block_height, int ghost_cells, int vars>