Euler appears to work now

2025-05-18 14:34:13 +02:00 · 2018-11-05 16:46:37 +01:00 · 2018-11-05 16:46:37 +01:00 · e38885d39b
commit e38885d39b
parent 0671bd747a
13 changed files with 702 additions and 19 deletions
--- a/EulerTesting.ipynb
+++ b/EulerTesting.ipynb
@ -0,0 +1,519 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Lets have matplotlib \"inline\"\n",
    "%matplotlib inline\n",
    "\n",
    "# Add line profiler\n",
    "%load_ext line_profiler\n",
    "\n",
    "#Import packages we need\n",
    "import numpy as np\n",
    "from matplotlib import animation, rc\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib.colorbar import ColorbarBase\n",
    "from matplotlib.colors import Normalize\n",
    "\n",
    "from IPython.display import display, HTML\n",
    "\n",
    "from enum import Enum\n",
    "import subprocess\n",
    "import os\n",
    "import gc\n",
    "import datetime\n",
    "import importlib\n",
    "import logging\n",
    "import netCDF4\n",
    "\n",
    "import pycuda.driver as cuda\n",
    "import pycuda.compiler\n",
    "\n",
    "try:\n",
    "    from StringIO import StringIO\n",
    "except ImportError:\n",
    "    from io import StringIO\n",
    "    \n",
    "#Set large figure sizes as default\n",
    "rc('figure', figsize=(16.0, 12.0))\n",
    "rc('animation', html='html5')\n",
    "rc('animation', bitrate=1800)\n",
    "\n",
    "from GPUSimulators import Common, IPythonMagic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%setup_logging --out test_schemes.log"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%cuda_context_handler --no_autotuning my_context "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def genShockBubble(nx, ny, gamma):\n",
    "    width = 4.0\n",
    "    height = 1.0\n",
    "    dx = width / float(nx)\n",
    "    dy = height / float(ny)\n",
    "\n",
    "    x_center = 0.5 #dx*nx/2.0\n",
    "    y_center = 0.5 #dy*ny/2.0\n",
    "\n",
    "\n",
    "    rho = np.ones((ny, nx), dtype=np.float32)\n",
    "    u = np.zeros((ny, nx), dtype=np.float32)\n",
    "    v = np.zeros((ny, nx), dtype=np.float32)\n",
    "    E = np.zeros((ny, nx), dtype=np.float32)\n",
    "    p = np.ones((ny, nx), dtype=np.float32)\n",
    "\n",
    "    x = (dx*(np.arange(0, nx, dtype=np.float32)+0.5) - x_center)\n",
    "    y = (dy*(np.arange(0, ny, dtype=np.float32)+0.5) - y_center)\n",
    "    xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')\n",
    "       \n",
    "    #Bubble\n",
    "    radius = 0.25\n",
    "    bubble = np.sqrt(xv**2+yv**2) <= radius\n",
    "    rho = np.where(bubble, 0.1, rho)\n",
    "    \n",
    "    #Left boundary\n",
    "    left = (xv - xv.min() < 0.1)\n",
    "    rho = np.where(left, 3.81250, rho)\n",
    "    u = np.where(left, 2.57669, u)\n",
    "    \n",
    "    #Energy\n",
    "    p = np.where(left, 10.0, p)\n",
    "    E = 0.5*rho*(u**2+v**2) + p/(gamma-1.0)\n",
    "    \n",
    "    #Estimate dt\n",
    "    scale = 0.9\n",
    "    max_rho_estimate = 5.0\n",
    "    max_u_estimate = 5.0\n",
    "    dx = width/nx\n",
    "    dy = height/ny\n",
    "    dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(gamma*max_rho_estimate))\n",
    "\n",
    "    return rho, rho*u, rho*v, E, dx, dy, dt\n",
    "\n",
    "nx = 400\n",
    "rho0, rho_u0, rho_v0, E0, dx, dy, dt = genShockBubble(nx, nx//4, 1.4)\n",
    "plt.figure()\n",
    "plt.imshow(rho0)\n",
    "plt.colorbar(orientation='vertical', aspect=20, pad=0.02, shrink=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def genKelvinHelmholz(nx, ny, gamma, roughness=0.5):\n",
    "    \"\"\"\n",
    "    Roughness parameter in (0, 1.0] determines how squiggly the interface betweeen the zones is\n",
    "    \"\"\"\n",
    "    \n",
    "    def genZones(nx, ny, n):\n",
    "        \"\"\"\n",
    "        Generates the zones of the two fluids of K-H\n",
    "        \"\"\"\n",
    "        zone = np.zeros((ny, nx), dtype=np.int32)\n",
    "\n",
    "        dx = 1.0 / nx\n",
    "        dy = 1.0 / ny\n",
    "\n",
    "        def genSmoothRandom(nx, n):\n",
    "            assert (n <= nx), \"Number of generated points nx must be larger than n\"\n",
    "            \n",
    "            if n == nx:\n",
    "                return np.random.random(nx)-0.5\n",
    "            else:\n",
    "                from scipy.interpolate import interp1d\n",
    "\n",
    "                #Control points and interpolator\n",
    "                xp = np.linspace(0.0, 1.0, n)\n",
    "                yp = np.random.random(n) - 0.5\n",
    "                f = interp1d(xp, yp, kind='cubic')\n",
    "\n",
    "                #Interpolation points\n",
    "                x = np.linspace(0.0, 1.0, nx)\n",
    "                return f(x)\n",
    "\n",
    "\n",
    "\n",
    "        x = np.linspace(0, 1, nx)\n",
    "        y = np.linspace(0, 1, ny)\n",
    "\n",
    "        _, y = np.meshgrid(x, y)\n",
    "\n",
    "        #print(y+a[0])\n",
    "\n",
    "        a = genSmoothRandom(nx, n)*dy\n",
    "        zone = np.where(y > 0.25+a, zone, 1)\n",
    "\n",
    "        a = genSmoothRandom(nx, n)*dy\n",
    "        zone = np.where(y < 0.75+a, zone, 1)\n",
    "        \n",
    "        return zone\n",
    "        \n",
    "    width = 2.0\n",
    "    height = 1.0\n",
    "    dx = width / float(nx)\n",
    "    dy = height / float(ny)\n",
    "\n",
    "    rho = np.empty((ny, nx), dtype=np.float32)\n",
    "    u = np.empty((ny, nx), dtype=np.float32)\n",
    "    v = np.zeros((ny, nx), dtype=np.float32)\n",
    "    p = 2.5*np.ones((ny, nx), dtype=np.float32)\n",
    "\n",
    "    #Generate the different zones    \n",
    "    zones = genZones(nx, ny, max(1, min(nx, int(nx*roughness))))\n",
    "    \n",
    "    #Zone 0\n",
    "    zone0 = zones == 0\n",
    "    rho = np.where(zone0, 1.0, rho)\n",
    "    u = np.where(zone0, 0.5, u)\n",
    "    \n",
    "    #Zone 1\n",
    "    zone1 = zones == 1\n",
    "    rho = np.where(zone1, 2.0, rho)\n",
    "    u = np.where(zone1, -0.5, u)\n",
    "    \n",
    "    E = 0.5*rho*(u**2+v**2) + p/(gamma-1.0)\n",
    "    \n",
    "    #Estimate dt\n",
    "    scale = 0.9\n",
    "    max_rho_estimate = 3.0\n",
    "    max_u_estimate = 2.0\n",
    "    dx = width/nx\n",
    "    dy = height/ny\n",
    "    dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(gamma*max_rho_estimate))\n",
    "\n",
    "    return rho, rho*u, rho*v, E, dx, dy, dt\n",
    "\n",
    "nx = 400\n",
    "rho0, rho_u0, rho_v0, E0, dx, dy, dt = genKelvinHelmholz(nx, nx//4, 1.4, 0.25)\n",
    "plt.figure()\n",
    "plt.imshow(rho0)\n",
    "plt.colorbar(orientation='vertical', aspect=20, pad=0.02, shrink=0.3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma):\n",
    "    #Construct simulator\n",
    "    arguments = {\n",
    "        'context': my_context,\n",
    "        'rho': rho0, 'rho_u': rho_u0, 'rho_v': rho_v0, 'E': E0,\n",
    "        'nx': nx, 'ny': ny,\n",
    "        'dx': dx, 'dy': dy, 'dt': 0.45*dt,\n",
    "        'gamma': gamma\n",
    "    } \n",
    "    with Common.Timer(\"construct\") as t:\n",
    "        from GPUSimulators import EE2D_KP07_dimsplit\n",
    "        importlib.reload(EE2D_KP07_dimsplit)\n",
    "        sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**arguments)\n",
    "    print(\"Constructed in \" + str(t.secs) + \" seconds\")\n",
    "\n",
    "    #Create netcdf file and simulate\n",
    "    with Common.DataDumper(outfile, mode='w', clobber=False) as outdata:\n",
    "        outdata.ncfile.createDimension('time', None)\n",
    "        outdata.ncfile.createDimension('x', nx)\n",
    "        outdata.ncfile.createDimension('y', ny)\n",
    "\n",
    "        outdata.time_var  = outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time')\n",
    "        outdata.x_var     = outdata.ncfile.createVariable('x', np.dtype('float32').char, 'x')\n",
    "        outdata.y_var     = outdata.ncfile.createVariable('y', np.dtype('float32').char, 'y')\n",
    "        outdata.rho_var   = outdata.ncfile.createVariable('rho', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
    "        outdata.rho_u_var = outdata.ncfile.createVariable('rho_u', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
    "        outdata.rho_v_var = outdata.ncfile.createVariable('rho_v', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
    "        outdata.E_var     = outdata.ncfile.createVariable('E', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
    "\n",
    "        outdata.x_var[:] = np.linspace(0, nx*dx, nx)\n",
    "        outdata.y_var[:] = np.linspace(0, ny*dy, ny)\n",
    "\n",
    "        progress_printer = Common.ProgressPrinter(n_save, print_every=10)\n",
    "        for i in range(n_save+1):\n",
    "            #Sanity check simulator\n",
    "            sim.check()\n",
    "\n",
    "            #Simulate\n",
    "            if (i > 0):\n",
    "                sim.simulate(t_end/n_save)\n",
    "\n",
    "            #Save to file\n",
    "            #rho = sim.u0[0].download(sim.stream)\n",
    "            rho, rho_u, rho_v, E = sim.download()\n",
    "            outdata.time_var[i] = sim.simTime()\n",
    "            outdata.rho_var[i, :] = rho\n",
    "            outdata.rho_u_var[i, :] = rho_u\n",
    "            outdata.rho_v_var[i, :] = rho_v\n",
    "            outdata.E_var[i, :] = E\n",
    "\n",
    "            #Write progress to screen\n",
    "            print_string = progress_printer.getPrintString(i)\n",
    "            if (print_string):\n",
    "                print(print_string)\n",
    "\n",
    "    return outdata.filename   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def genSchlieren(rho):\n",
    "    #Compute length of z-component of normalized gradient vector \n",
    "    normal = np.gradient(rho) #[x, y, 1]\n",
    "    length = 1.0 / np.sqrt(normal[0]**2 + normal[1]**2 + 1.0**2)\n",
    "    schlieren = np.power(length, 128)\n",
    "    return schlieren\n",
    "\n",
    "\n",
    "def genCurl(rho, rho_u, rho_v):\n",
    "    u = rho_u / rho\n",
    "    v = rho_v / rho\n",
    "    u = np.sqrt(u**2 + v**2)\n",
    "    u_max = u.max()\n",
    "    \n",
    "    du_dy, du_dx = np.gradient(u)\n",
    "    dv_dy, dv_dx = np.gradient(v)\n",
    "    \n",
    "    curl = dv_dx - du_dy\n",
    "    return curl\n",
    "\n",
    "\n",
    "def genColors(rho, rho_u, rho_v, cmap, vmax, vmin):\n",
    "    schlieren = genSchlieren(rho)\n",
    "    curl = genCurl(rho, rho_u, rho_v)\n",
    "    \n",
    "    colors = Normalize(vmin, vmax, clip=True)(curl)\n",
    "    colors = cmap(colors)\n",
    "    for k in range(3):\n",
    "        colors[:,:,k] = colors[:,:,k]*schlieren\n",
    "        \n",
    "    return colors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class VisType(Enum):\n",
    "    Schlieren = 0\n",
    "    Density = 1\n",
    "\n",
    "def makeAnimation(infile, vis_type, vmin, vmax, save_anim=False, cmap=plt.cm.coolwarm):\n",
    "    fig = plt.figure(figsize=(16, 8))#, tight_layout=True)\n",
    "    \n",
    "    with Common.DataDumper(infile, 'r') as indata:\n",
    "        time = indata.ncfile.variables['time'][:]\n",
    "        x = indata.ncfile.variables['x'][:]\n",
    "        y = indata.ncfile.variables['y'][:]\n",
    "        rho = indata.ncfile.variables['rho'][0]\n",
    "        rho_u = indata.ncfile.variables['rho_u'][0]\n",
    "        rho_v = indata.ncfile.variables['rho_v'][0]\n",
    "        num_frames = len(time)\n",
    "\n",
    "        ax1 = fig.gca()\n",
    "        extents = [0, x.max(), 0, y.max()]\n",
    "        \n",
    "        if (vis_type == VisType.Schlieren):\n",
    "            im = ax1.imshow(genColors(rho, rho_u, rho_v, cmap, vmax, vmin), origin='bottom', extent=extents, cmap='gray', vmin=0.0, vmax=1.0)\n",
    "            fig.suptitle(\"Schlieren / vorticity at t={:.2f}\".format(time[0]))\n",
    "        elif (vis_type == VisType.Density):\n",
    "            im = ax1.imshow(rho, origin='bottom', extent=extents, cmap=cmap, vmin=vmin, vmax=vmax)\n",
    "            fig.suptitle(\"Density at t={:.2f}\".format(time[0]))\n",
    "        else:\n",
    "            assert False, \"Wrong vis_type\"\n",
    "\n",
    "        #Create colorbar\n",
    "        from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
    "        divider = make_axes_locatable(ax1)\n",
    "        ax2 = divider.append_axes(\"right\", size=0.1, pad=0.05)\n",
    "        cb1 = ColorbarBase(ax2, cmap=cmap,\n",
    "                            norm=Normalize(vmin=vmin, vmax=vmax),\n",
    "                            orientation='vertical')\n",
    "        \n",
    "        #Label colorbar\n",
    "        if (vis_type == VisType.Schlieren):\n",
    "            cb1.set_label('Vorticity')\n",
    "        elif (vis_type == VisType.Density):\n",
    "            cb1.set_label('Density')\n",
    "\n",
    "        progress_printer = Common.ProgressPrinter(num_frames, print_every=5)\n",
    "        \n",
    "        def animate(i):\n",
    "            rho = indata.ncfile.variables['rho'][i]\n",
    "            rho_u = indata.ncfile.variables['rho_u'][i]\n",
    "            rho_v = indata.ncfile.variables['rho_v'][i]\n",
    "            \n",
    "            if (vis_type == VisType.Schlieren):\n",
    "                im.set_data(genColors(rho, rho_u, rho_v, cmap, vmax, vmin))\n",
    "                fig.suptitle(\"Schlieren / vorticity at t={:.2f}\".format(time[i]))\n",
    "            elif (vis_type == VisType.Density):\n",
    "                im.set_data(rho) \n",
    "                fig.suptitle(\"Density at t={:.2f}\".format(time[i]))\n",
    "            \n",
    "            #Write progress to screen\n",
    "            print_string = progress_printer.getPrintString(i)\n",
    "            if (print_string):\n",
    "                print(print_string)\n",
    "\n",
    "        anim = animation.FuncAnimation(fig, animate, interval=50, frames=range(num_frames))\n",
    "        plt.close()\n",
    "\n",
    "        if (save_anim):\n",
    "            root, _ = os.path.splitext(infile)\n",
    "            movie_outpath = os.path.abspath(root + \".mp4\")\n",
    "            if (os.path.isfile(movie_outpath)):\n",
    "                print(\"Reusing previously created movie \" + movie_outpath)\n",
    "            else:\n",
    "                print(\"Creating movie \" + movie_outpath)\n",
    "                from matplotlib.animation import FFMpegFileWriter\n",
    "                writer = FFMpegFileWriter(fps=25)\n",
    "                anim.save(movie_outpath, writer=writer)\n",
    "            display(HTML(\"\"\"\n",
    "            <div align=\"middle\">\n",
    "            <video width=\"80%\" controls>\n",
    "                <source src=\"{:s}\" type=\"video/mp4\">\n",
    "            </video>\n",
    "            </div>\n",
    "            \"\"\".format(movie_outpath)))\n",
    "        else:\n",
    "            #plt.rcParams[\"animation.html\"] = \"html5\"\n",
    "            plt.rcParams[\"animation.html\"] = \"jshtml\"\n",
    "            display(anim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "nx = 1600\n",
    "ny = nx//4\n",
    "gamma = 1.4\n",
    "t_end = 3.0\n",
    "n_save = 200\n",
    "outfile = \"data/euler_shock-bubble.nc\"\n",
    "outdata = None\n",
    "\n",
    "rho0, rho_u0, rho_v0, E0, dx, dy, dt = genShockBubble(nx, ny, gamma)\n",
    "outfile = makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#makeAnimation('data/euler_simdata_0000.nc')\n",
    "makeAnimation(outfile, vis_type=VisType.Schlieren, vmin=-0.2, vmax=0.2, cmap=plt.cm.RdBu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "nx = 1600\n",
    "ny = nx//2\n",
    "gamma = 1.4\n",
    "roughness = 0.125\n",
    "t_end = 2.0#10.0\n",
    "n_save = 100#1000\n",
    "outfile = \"data/euler_kelvin_helmholz.nc\"\n",
    "outdata = None\n",
    "\n",
    "rho0, rho_u0, rho_v0, E0, dx, dy, dt = genKelvinHelmholz(nx, ny, gamma, roughness)\n",
    "outfile = makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "makeAnimation(outfile, vis_type=VisType.Density, vmin=1.0, vmax=2.0)\n",
    "#makeAnimation('data/euler_kelvin_helmholz_0012.nc', cmap=plt.cm.coolwarm, save_anim=True, vmin=1.1, vmax=1.9)\n",
    "#makeAnimation('data/euler_kelvin_helmholz_0011.nc', save_anim=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "makeAnimation(outfile, vis_type=VisType.Schlieren, vmin=-0.2, vmax=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:anaconda3]",
   "language": "python",
   "name": "conda-env-anaconda3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -29,16 +29,17 @@ import io
 import hashlib
 import logging
 import gc
 import netCDF4
 import pycuda.compiler as cuda_compiler
 import pycuda.gpuarray
 import pycuda.driver as cuda
 """
 Class which keeps track of time spent for a section of code
 """
 class Timer(object):
    """
    Class which keeps track of time spent for a section of code
    """
    def __init__(self, tag, log_level=logging.DEBUG):
        self.tag = tag
        self.log_level = log_level
@ -63,9 +64,116 @@ class Timer(object):
 class DataDumper(object):
    """
    Simple class for holding a netCDF4 object
    (handles opening and closing in a nice way)
    Use as 
    with DataDumper("filename") as data:
        ...
    """
    def __init__(self, filename, *args, **kwargs):
        self.logger = logging.getLogger(__name__)
        #Create directory if needed
        dirname = os.path.dirname(filename)
        if not os.path.isdir(dirname):
            self.logger.info("Creating directory " + dirname)
            os.makedirs(dirname)
        #Get mode of file if we have that
        mode = None
        if (args):
            mode = args[0]
        elif (kwargs and 'mode' in kwargs.keys()):
            mode = kwargs['mode']
        #Create new unique file if writing
        if (mode):
            if (("w" in mode) or ("+" in mode) or ("a" in mode)):
                i = 0
                stem, ext = os.path.splitext(filename)
                while (os.path.isfile(filename)):
                    filename = "{:s}_{:04d}{:s}".format(stem, i, ext)
                    i = i+1
        self.filename = os.path.abspath(filename)
        #Save arguments
        self.args = args
        self.kwargs = kwargs
        #Log output
        self.logger.info("Writing output to " + self.filename)
    def __enter__(self):
        self.logger.info("Opening " + self.filename)
        if (self.args):
            self.logger.info("Arguments: " + str(self.args))
        if (self.kwargs):
            self.logger.info("Keyword arguments: " + str(self.kwargs))
        self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs)
        return self
    def __exit__(self, *args):
        self.logger.info("Closing " + self.filename)
        self.ncfile.close()
 class ProgressPrinter(object):
    """
    Small helper class for 
    """
    def __init__(self, total_steps, print_every=5):
        self.logger = logging.getLogger(__name__)
        self.start = time.time()
        self.total_steps = total_steps
        self.print_every = print_every
        self.next_print_time = self.print_every
        self.last_step = 0
        self.secs_per_iter = None
    def getPrintString(self, step):
        elapsed =  time.time() - self.start
        if (elapsed > self.next_print_time):            
            dt = elapsed - (self.next_print_time - self.print_every)
            dsteps = step - self.last_step
            steps_remaining = self.total_steps - step
            if (dsteps == 0):
                return
            self.last_step = step
            self.next_print_time = elapsed + self.print_every
            if not self.secs_per_iter:
                self.secs_per_iter = dt / dsteps
            self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps)
            remaining_time = steps_remaining * self.secs_per_iter
            return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format(
                ProgressPrinter.progressBar(step, self.total_steps), 
                ProgressPrinter.timeString(elapsed + remaining_time), 
                ProgressPrinter.timeString(elapsed), 
                ProgressPrinter.timeString(remaining_time))
    def timeString(seconds):
        seconds = int(max(seconds, 1))
        minutes, seconds = divmod(seconds, 60)
        hours, minutes = divmod(minutes, 60)
        periods = [('h', hours), ('m', minutes), ('s', seconds)]
        time_string = ' '.join('{}{}'.format(value, name)
                                for name, value in periods
                                if value)
        return time_string
    def progressBar(step, total_steps, width=30):
        progress = np.round(width * step / total_steps).astype(np.int32)
        progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%"
        return progressbar
--- a/GPUSimulators/EE2D_KP07_dimsplit.py
+++ b/GPUSimulators/EE2D_KP07_dimsplit.py
@ -113,6 +113,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
                self.u1[3].data.gpudata, self.u1[3].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def stepDimsplitYX(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -131,6 +132,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
                self.u1[3].data.gpudata, self.u1[3].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/FORCE.py
+++ b/GPUSimulators/FORCE.py
@ -105,6 +105,7 @@ class FORCE (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/HLL.py
+++ b/GPUSimulators/HLL.py
@ -100,6 +100,7 @@ class HLL (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/HLL2.py
+++ b/GPUSimulators/HLL2.py
@ -110,6 +110,7 @@ class HLL2 (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def stepDimsplitYX(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -126,6 +127,7 @@ class HLL2 (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/KP07.py
+++ b/GPUSimulators/KP07.py
@ -110,6 +110,7 @@ class KP07 (Simulator.BaseSimulator):
    def stepEuler(self, dt):
        self.substepRK(dt, 0)
        self.t += dt
        self.nt += 1
    def stepRK(self, dt, order):
        if (order != 2):
@ -117,6 +118,7 @@ class KP07 (Simulator.BaseSimulator):
        self.substepRK(dt, 0)
        self.substepRK(dt, 1)
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/KP07_dimsplit.py
+++ b/GPUSimulators/KP07_dimsplit.py
@ -110,6 +110,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def stepDimsplitYX(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -126,6 +127,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/LxF.py
+++ b/GPUSimulators/LxF.py
@ -101,6 +101,7 @@ class LxF (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/Simulator.py
+++ b/GPUSimulators/Simulator.py
@ -80,8 +80,9 @@ class BaseSimulator:
        #Create a CUDA stream
        self.stream = cuda.Stream()
-        #Keep track of simulation time
+        #Keep track of simulation time and number of timesteps
-        self.t = 0.0;
+        self.t = 0.0
        self.nt = 0
        #Log progress every n seconds during simulation
        self.log_every = 5
@ -209,14 +210,16 @@ class BaseSimulator:
    def download(self):
        raise(NotImplementedError("Needs to be implemented in subclass"))
    def check(self):
        raise(NotImplementedError("Needs to be implemented in subclass"))
    def sim_time(self):
        return self.t
    def synchronize(self):
        self.stream.synchronize()
    def check(self):
        raise(NotImplementedError("Needs to be implemented in subclass"))
    def simTime(self):
        return self.t
    def simSteps(self):
        return self.nt
--- a/GPUSimulators/WAF.py
+++ b/GPUSimulators/WAF.py
@ -103,6 +103,7 @@ class WAF (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def stepDimsplitYX(self, dt):
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
@ -118,6 +119,7 @@ class WAF (Simulator.BaseSimulator):
                self.u1[2].data.gpudata, self.u1[2].data.strides[0])
        self.u0, self.u1 = self.u1, self.u0
        self.t += dt
        self.nt += 1
    def download(self):
        return self.u0.download(self.stream)
--- a/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/EE2D_KP07_dimsplit.cu
@ -154,10 +154,10 @@ __global__ void KP07DimsplitKernel(
    //Read into shared memory
-    readBlock<w, h, gc>(  rho0_ptr_,   rho0_pitch_, Q[0], nx_+4, ny_+4);
+    readBlock<w, h, gc>(  rho0_ptr_,   rho0_pitch_, Q[0], nx_, ny_);
-    readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_+4, ny_+4);
+    readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
-    readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_+4, ny_+4);
+    readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
-    readBlock<w, h, gc>(    E0_ptr_,     E0_pitch_, Q[3], nx_+4, ny_+4);
+    readBlock<w, h, gc>(    E0_ptr_,     E0_pitch_, Q[3], nx_, ny_);
    __syncthreads();
    //Fix boundary conditions
@ -226,6 +226,26 @@ __global__ void KP07DimsplitKernel(
        evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
        __syncthreads();
        //This is the RK2-part
        const int tx = threadIdx.x + gc;
        const int ty = threadIdx.y + gc;
        const float q1 = Q[0][ty][tx];
        const float q2 = Q[1][ty][tx];
        const float q3 = Q[2][ty][tx];
        const float q4 = Q[3][ty][tx];
        __syncthreads();
        readBlock<w, h, gc>(  rho1_ptr_,   rho1_pitch_, Q[0], nx_, ny_);
        readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
        readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
        readBlock<w, h, gc>(    E1_ptr_,     E1_pitch_, Q[3], nx_, ny_);
        __syncthreads();
        Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
        Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
        Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
        Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
    }
--- a/GPUSimulators/cuda/common.h
+++ b/GPUSimulators/cuda/common.h
@ -92,19 +92,29 @@ __device__ float desingularize(float x_, float eps_) {
 template<int block_width, int block_height, int ghost_cells>
 inline __device__ void readBlock(float* ptr_, int pitch_,
                float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells], 
-                const int max_x_, const int max_y_) {
+                const int nx_, const int ny_) {
    //Index of block within domain
    const int bx = blockDim.x * blockIdx.x;
    const int by = blockDim.y * blockIdx.y;
-        
+
    const int gc_pad = 4;
    //Read into shared memory
    //Loop over all variables
    for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
-        const int l = min(by + j, max_y_-1);
+        //const int l = min(by + j, ny_+2*ghost_cells-1);
        const int y = by + j;
        const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad); 
        const int l = y + y_offset;
        float* row = (float*) ((char*) ptr_  + pitch_*l);
        for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
-            const int k = min(bx + i, max_x_-1);
+            //const int k = min(bx + i, nx_+2*ghost_cells-1);
            const int x = bx + i;
            const int gc_pad = 4;
            const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad); 
            const int k = x + x_offset;
            shmem[j][i] = row[k];
        }
@ -248,6 +258,16 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
 template<int block_width, int block_height, int ghost_cells, int vars>