mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
Euler appears to work now
This commit is contained in:
parent
0671bd747a
commit
e38885d39b
519
EulerTesting.ipynb
Normal file
519
EulerTesting.ipynb
Normal file
@ -0,0 +1,519 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#Lets have matplotlib \"inline\"\n",
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"\n",
|
||||||
|
"# Add line profiler\n",
|
||||||
|
"%load_ext line_profiler\n",
|
||||||
|
"\n",
|
||||||
|
"#Import packages we need\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"from matplotlib import animation, rc\n",
|
||||||
|
"from matplotlib import pyplot as plt\n",
|
||||||
|
"from matplotlib.colorbar import ColorbarBase\n",
|
||||||
|
"from matplotlib.colors import Normalize\n",
|
||||||
|
"\n",
|
||||||
|
"from IPython.display import display, HTML\n",
|
||||||
|
"\n",
|
||||||
|
"from enum import Enum\n",
|
||||||
|
"import subprocess\n",
|
||||||
|
"import os\n",
|
||||||
|
"import gc\n",
|
||||||
|
"import datetime\n",
|
||||||
|
"import importlib\n",
|
||||||
|
"import logging\n",
|
||||||
|
"import netCDF4\n",
|
||||||
|
"\n",
|
||||||
|
"import pycuda.driver as cuda\n",
|
||||||
|
"import pycuda.compiler\n",
|
||||||
|
"\n",
|
||||||
|
"try:\n",
|
||||||
|
" from StringIO import StringIO\n",
|
||||||
|
"except ImportError:\n",
|
||||||
|
" from io import StringIO\n",
|
||||||
|
" \n",
|
||||||
|
"#Set large figure sizes as default\n",
|
||||||
|
"rc('figure', figsize=(16.0, 12.0))\n",
|
||||||
|
"rc('animation', html='html5')\n",
|
||||||
|
"rc('animation', bitrate=1800)\n",
|
||||||
|
"\n",
|
||||||
|
"from GPUSimulators import Common, IPythonMagic"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%setup_logging --out test_schemes.log"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"%cuda_context_handler --no_autotuning my_context "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def genShockBubble(nx, ny, gamma):\n",
|
||||||
|
" width = 4.0\n",
|
||||||
|
" height = 1.0\n",
|
||||||
|
" dx = width / float(nx)\n",
|
||||||
|
" dy = height / float(ny)\n",
|
||||||
|
"\n",
|
||||||
|
" x_center = 0.5 #dx*nx/2.0\n",
|
||||||
|
" y_center = 0.5 #dy*ny/2.0\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" rho = np.ones((ny, nx), dtype=np.float32)\n",
|
||||||
|
" u = np.zeros((ny, nx), dtype=np.float32)\n",
|
||||||
|
" v = np.zeros((ny, nx), dtype=np.float32)\n",
|
||||||
|
" E = np.zeros((ny, nx), dtype=np.float32)\n",
|
||||||
|
" p = np.ones((ny, nx), dtype=np.float32)\n",
|
||||||
|
"\n",
|
||||||
|
" x = (dx*(np.arange(0, nx, dtype=np.float32)+0.5) - x_center)\n",
|
||||||
|
" y = (dy*(np.arange(0, ny, dtype=np.float32)+0.5) - y_center)\n",
|
||||||
|
" xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')\n",
|
||||||
|
" \n",
|
||||||
|
" #Bubble\n",
|
||||||
|
" radius = 0.25\n",
|
||||||
|
" bubble = np.sqrt(xv**2+yv**2) <= radius\n",
|
||||||
|
" rho = np.where(bubble, 0.1, rho)\n",
|
||||||
|
" \n",
|
||||||
|
" #Left boundary\n",
|
||||||
|
" left = (xv - xv.min() < 0.1)\n",
|
||||||
|
" rho = np.where(left, 3.81250, rho)\n",
|
||||||
|
" u = np.where(left, 2.57669, u)\n",
|
||||||
|
" \n",
|
||||||
|
" #Energy\n",
|
||||||
|
" p = np.where(left, 10.0, p)\n",
|
||||||
|
" E = 0.5*rho*(u**2+v**2) + p/(gamma-1.0)\n",
|
||||||
|
" \n",
|
||||||
|
" #Estimate dt\n",
|
||||||
|
" scale = 0.9\n",
|
||||||
|
" max_rho_estimate = 5.0\n",
|
||||||
|
" max_u_estimate = 5.0\n",
|
||||||
|
" dx = width/nx\n",
|
||||||
|
" dy = height/ny\n",
|
||||||
|
" dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(gamma*max_rho_estimate))\n",
|
||||||
|
"\n",
|
||||||
|
" return rho, rho*u, rho*v, E, dx, dy, dt\n",
|
||||||
|
"\n",
|
||||||
|
"nx = 400\n",
|
||||||
|
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genShockBubble(nx, nx//4, 1.4)\n",
|
||||||
|
"plt.figure()\n",
|
||||||
|
"plt.imshow(rho0)\n",
|
||||||
|
"plt.colorbar(orientation='vertical', aspect=20, pad=0.02, shrink=0.3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def genKelvinHelmholz(nx, ny, gamma, roughness=0.5):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Roughness parameter in (0, 1.0] determines how squiggly the interface betweeen the zones is\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" \n",
|
||||||
|
" def genZones(nx, ny, n):\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" Generates the zones of the two fluids of K-H\n",
|
||||||
|
" \"\"\"\n",
|
||||||
|
" zone = np.zeros((ny, nx), dtype=np.int32)\n",
|
||||||
|
"\n",
|
||||||
|
" dx = 1.0 / nx\n",
|
||||||
|
" dy = 1.0 / ny\n",
|
||||||
|
"\n",
|
||||||
|
" def genSmoothRandom(nx, n):\n",
|
||||||
|
" assert (n <= nx), \"Number of generated points nx must be larger than n\"\n",
|
||||||
|
" \n",
|
||||||
|
" if n == nx:\n",
|
||||||
|
" return np.random.random(nx)-0.5\n",
|
||||||
|
" else:\n",
|
||||||
|
" from scipy.interpolate import interp1d\n",
|
||||||
|
"\n",
|
||||||
|
" #Control points and interpolator\n",
|
||||||
|
" xp = np.linspace(0.0, 1.0, n)\n",
|
||||||
|
" yp = np.random.random(n) - 0.5\n",
|
||||||
|
" f = interp1d(xp, yp, kind='cubic')\n",
|
||||||
|
"\n",
|
||||||
|
" #Interpolation points\n",
|
||||||
|
" x = np.linspace(0.0, 1.0, nx)\n",
|
||||||
|
" return f(x)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" x = np.linspace(0, 1, nx)\n",
|
||||||
|
" y = np.linspace(0, 1, ny)\n",
|
||||||
|
"\n",
|
||||||
|
" _, y = np.meshgrid(x, y)\n",
|
||||||
|
"\n",
|
||||||
|
" #print(y+a[0])\n",
|
||||||
|
"\n",
|
||||||
|
" a = genSmoothRandom(nx, n)*dy\n",
|
||||||
|
" zone = np.where(y > 0.25+a, zone, 1)\n",
|
||||||
|
"\n",
|
||||||
|
" a = genSmoothRandom(nx, n)*dy\n",
|
||||||
|
" zone = np.where(y < 0.75+a, zone, 1)\n",
|
||||||
|
" \n",
|
||||||
|
" return zone\n",
|
||||||
|
" \n",
|
||||||
|
" width = 2.0\n",
|
||||||
|
" height = 1.0\n",
|
||||||
|
" dx = width / float(nx)\n",
|
||||||
|
" dy = height / float(ny)\n",
|
||||||
|
"\n",
|
||||||
|
" rho = np.empty((ny, nx), dtype=np.float32)\n",
|
||||||
|
" u = np.empty((ny, nx), dtype=np.float32)\n",
|
||||||
|
" v = np.zeros((ny, nx), dtype=np.float32)\n",
|
||||||
|
" p = 2.5*np.ones((ny, nx), dtype=np.float32)\n",
|
||||||
|
"\n",
|
||||||
|
" #Generate the different zones \n",
|
||||||
|
" zones = genZones(nx, ny, max(1, min(nx, int(nx*roughness))))\n",
|
||||||
|
" \n",
|
||||||
|
" #Zone 0\n",
|
||||||
|
" zone0 = zones == 0\n",
|
||||||
|
" rho = np.where(zone0, 1.0, rho)\n",
|
||||||
|
" u = np.where(zone0, 0.5, u)\n",
|
||||||
|
" \n",
|
||||||
|
" #Zone 1\n",
|
||||||
|
" zone1 = zones == 1\n",
|
||||||
|
" rho = np.where(zone1, 2.0, rho)\n",
|
||||||
|
" u = np.where(zone1, -0.5, u)\n",
|
||||||
|
" \n",
|
||||||
|
" E = 0.5*rho*(u**2+v**2) + p/(gamma-1.0)\n",
|
||||||
|
" \n",
|
||||||
|
" #Estimate dt\n",
|
||||||
|
" scale = 0.9\n",
|
||||||
|
" max_rho_estimate = 3.0\n",
|
||||||
|
" max_u_estimate = 2.0\n",
|
||||||
|
" dx = width/nx\n",
|
||||||
|
" dy = height/ny\n",
|
||||||
|
" dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(gamma*max_rho_estimate))\n",
|
||||||
|
"\n",
|
||||||
|
" return rho, rho*u, rho*v, E, dx, dy, dt\n",
|
||||||
|
"\n",
|
||||||
|
"nx = 400\n",
|
||||||
|
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genKelvinHelmholz(nx, nx//4, 1.4, 0.25)\n",
|
||||||
|
"plt.figure()\n",
|
||||||
|
"plt.imshow(rho0)\n",
|
||||||
|
"plt.colorbar(orientation='vertical', aspect=20, pad=0.02, shrink=0.3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma):\n",
|
||||||
|
" #Construct simulator\n",
|
||||||
|
" arguments = {\n",
|
||||||
|
" 'context': my_context,\n",
|
||||||
|
" 'rho': rho0, 'rho_u': rho_u0, 'rho_v': rho_v0, 'E': E0,\n",
|
||||||
|
" 'nx': nx, 'ny': ny,\n",
|
||||||
|
" 'dx': dx, 'dy': dy, 'dt': 0.45*dt,\n",
|
||||||
|
" 'gamma': gamma\n",
|
||||||
|
" } \n",
|
||||||
|
" with Common.Timer(\"construct\") as t:\n",
|
||||||
|
" from GPUSimulators import EE2D_KP07_dimsplit\n",
|
||||||
|
" importlib.reload(EE2D_KP07_dimsplit)\n",
|
||||||
|
" sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**arguments)\n",
|
||||||
|
" print(\"Constructed in \" + str(t.secs) + \" seconds\")\n",
|
||||||
|
"\n",
|
||||||
|
" #Create netcdf file and simulate\n",
|
||||||
|
" with Common.DataDumper(outfile, mode='w', clobber=False) as outdata:\n",
|
||||||
|
" outdata.ncfile.createDimension('time', None)\n",
|
||||||
|
" outdata.ncfile.createDimension('x', nx)\n",
|
||||||
|
" outdata.ncfile.createDimension('y', ny)\n",
|
||||||
|
"\n",
|
||||||
|
" outdata.time_var = outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time')\n",
|
||||||
|
" outdata.x_var = outdata.ncfile.createVariable('x', np.dtype('float32').char, 'x')\n",
|
||||||
|
" outdata.y_var = outdata.ncfile.createVariable('y', np.dtype('float32').char, 'y')\n",
|
||||||
|
" outdata.rho_var = outdata.ncfile.createVariable('rho', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
|
||||||
|
" outdata.rho_u_var = outdata.ncfile.createVariable('rho_u', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
|
||||||
|
" outdata.rho_v_var = outdata.ncfile.createVariable('rho_v', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
|
||||||
|
" outdata.E_var = outdata.ncfile.createVariable('E', np.dtype('float32').char, ('time', 'y', 'x'), zlib=True)\n",
|
||||||
|
"\n",
|
||||||
|
" outdata.x_var[:] = np.linspace(0, nx*dx, nx)\n",
|
||||||
|
" outdata.y_var[:] = np.linspace(0, ny*dy, ny)\n",
|
||||||
|
"\n",
|
||||||
|
" progress_printer = Common.ProgressPrinter(n_save, print_every=10)\n",
|
||||||
|
" for i in range(n_save+1):\n",
|
||||||
|
" #Sanity check simulator\n",
|
||||||
|
" sim.check()\n",
|
||||||
|
"\n",
|
||||||
|
" #Simulate\n",
|
||||||
|
" if (i > 0):\n",
|
||||||
|
" sim.simulate(t_end/n_save)\n",
|
||||||
|
"\n",
|
||||||
|
" #Save to file\n",
|
||||||
|
" #rho = sim.u0[0].download(sim.stream)\n",
|
||||||
|
" rho, rho_u, rho_v, E = sim.download()\n",
|
||||||
|
" outdata.time_var[i] = sim.simTime()\n",
|
||||||
|
" outdata.rho_var[i, :] = rho\n",
|
||||||
|
" outdata.rho_u_var[i, :] = rho_u\n",
|
||||||
|
" outdata.rho_v_var[i, :] = rho_v\n",
|
||||||
|
" outdata.E_var[i, :] = E\n",
|
||||||
|
"\n",
|
||||||
|
" #Write progress to screen\n",
|
||||||
|
" print_string = progress_printer.getPrintString(i)\n",
|
||||||
|
" if (print_string):\n",
|
||||||
|
" print(print_string)\n",
|
||||||
|
"\n",
|
||||||
|
" return outdata.filename "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def genSchlieren(rho):\n",
|
||||||
|
" #Compute length of z-component of normalized gradient vector \n",
|
||||||
|
" normal = np.gradient(rho) #[x, y, 1]\n",
|
||||||
|
" length = 1.0 / np.sqrt(normal[0]**2 + normal[1]**2 + 1.0**2)\n",
|
||||||
|
" schlieren = np.power(length, 128)\n",
|
||||||
|
" return schlieren\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def genCurl(rho, rho_u, rho_v):\n",
|
||||||
|
" u = rho_u / rho\n",
|
||||||
|
" v = rho_v / rho\n",
|
||||||
|
" u = np.sqrt(u**2 + v**2)\n",
|
||||||
|
" u_max = u.max()\n",
|
||||||
|
" \n",
|
||||||
|
" du_dy, du_dx = np.gradient(u)\n",
|
||||||
|
" dv_dy, dv_dx = np.gradient(v)\n",
|
||||||
|
" \n",
|
||||||
|
" curl = dv_dx - du_dy\n",
|
||||||
|
" return curl\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def genColors(rho, rho_u, rho_v, cmap, vmax, vmin):\n",
|
||||||
|
" schlieren = genSchlieren(rho)\n",
|
||||||
|
" curl = genCurl(rho, rho_u, rho_v)\n",
|
||||||
|
" \n",
|
||||||
|
" colors = Normalize(vmin, vmax, clip=True)(curl)\n",
|
||||||
|
" colors = cmap(colors)\n",
|
||||||
|
" for k in range(3):\n",
|
||||||
|
" colors[:,:,k] = colors[:,:,k]*schlieren\n",
|
||||||
|
" \n",
|
||||||
|
" return colors"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"class VisType(Enum):\n",
|
||||||
|
" Schlieren = 0\n",
|
||||||
|
" Density = 1\n",
|
||||||
|
"\n",
|
||||||
|
"def makeAnimation(infile, vis_type, vmin, vmax, save_anim=False, cmap=plt.cm.coolwarm):\n",
|
||||||
|
" fig = plt.figure(figsize=(16, 8))#, tight_layout=True)\n",
|
||||||
|
" \n",
|
||||||
|
" with Common.DataDumper(infile, 'r') as indata:\n",
|
||||||
|
" time = indata.ncfile.variables['time'][:]\n",
|
||||||
|
" x = indata.ncfile.variables['x'][:]\n",
|
||||||
|
" y = indata.ncfile.variables['y'][:]\n",
|
||||||
|
" rho = indata.ncfile.variables['rho'][0]\n",
|
||||||
|
" rho_u = indata.ncfile.variables['rho_u'][0]\n",
|
||||||
|
" rho_v = indata.ncfile.variables['rho_v'][0]\n",
|
||||||
|
" num_frames = len(time)\n",
|
||||||
|
"\n",
|
||||||
|
" ax1 = fig.gca()\n",
|
||||||
|
" extents = [0, x.max(), 0, y.max()]\n",
|
||||||
|
" \n",
|
||||||
|
" if (vis_type == VisType.Schlieren):\n",
|
||||||
|
" im = ax1.imshow(genColors(rho, rho_u, rho_v, cmap, vmax, vmin), origin='bottom', extent=extents, cmap='gray', vmin=0.0, vmax=1.0)\n",
|
||||||
|
" fig.suptitle(\"Schlieren / vorticity at t={:.2f}\".format(time[0]))\n",
|
||||||
|
" elif (vis_type == VisType.Density):\n",
|
||||||
|
" im = ax1.imshow(rho, origin='bottom', extent=extents, cmap=cmap, vmin=vmin, vmax=vmax)\n",
|
||||||
|
" fig.suptitle(\"Density at t={:.2f}\".format(time[0]))\n",
|
||||||
|
" else:\n",
|
||||||
|
" assert False, \"Wrong vis_type\"\n",
|
||||||
|
"\n",
|
||||||
|
" #Create colorbar\n",
|
||||||
|
" from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
|
||||||
|
" divider = make_axes_locatable(ax1)\n",
|
||||||
|
" ax2 = divider.append_axes(\"right\", size=0.1, pad=0.05)\n",
|
||||||
|
" cb1 = ColorbarBase(ax2, cmap=cmap,\n",
|
||||||
|
" norm=Normalize(vmin=vmin, vmax=vmax),\n",
|
||||||
|
" orientation='vertical')\n",
|
||||||
|
" \n",
|
||||||
|
" #Label colorbar\n",
|
||||||
|
" if (vis_type == VisType.Schlieren):\n",
|
||||||
|
" cb1.set_label('Vorticity')\n",
|
||||||
|
" elif (vis_type == VisType.Density):\n",
|
||||||
|
" cb1.set_label('Density')\n",
|
||||||
|
"\n",
|
||||||
|
" progress_printer = Common.ProgressPrinter(num_frames, print_every=5)\n",
|
||||||
|
" \n",
|
||||||
|
" def animate(i):\n",
|
||||||
|
" rho = indata.ncfile.variables['rho'][i]\n",
|
||||||
|
" rho_u = indata.ncfile.variables['rho_u'][i]\n",
|
||||||
|
" rho_v = indata.ncfile.variables['rho_v'][i]\n",
|
||||||
|
" \n",
|
||||||
|
" if (vis_type == VisType.Schlieren):\n",
|
||||||
|
" im.set_data(genColors(rho, rho_u, rho_v, cmap, vmax, vmin))\n",
|
||||||
|
" fig.suptitle(\"Schlieren / vorticity at t={:.2f}\".format(time[i]))\n",
|
||||||
|
" elif (vis_type == VisType.Density):\n",
|
||||||
|
" im.set_data(rho) \n",
|
||||||
|
" fig.suptitle(\"Density at t={:.2f}\".format(time[i]))\n",
|
||||||
|
" \n",
|
||||||
|
" #Write progress to screen\n",
|
||||||
|
" print_string = progress_printer.getPrintString(i)\n",
|
||||||
|
" if (print_string):\n",
|
||||||
|
" print(print_string)\n",
|
||||||
|
"\n",
|
||||||
|
" anim = animation.FuncAnimation(fig, animate, interval=50, frames=range(num_frames))\n",
|
||||||
|
" plt.close()\n",
|
||||||
|
"\n",
|
||||||
|
" if (save_anim):\n",
|
||||||
|
" root, _ = os.path.splitext(infile)\n",
|
||||||
|
" movie_outpath = os.path.abspath(root + \".mp4\")\n",
|
||||||
|
" if (os.path.isfile(movie_outpath)):\n",
|
||||||
|
" print(\"Reusing previously created movie \" + movie_outpath)\n",
|
||||||
|
" else:\n",
|
||||||
|
" print(\"Creating movie \" + movie_outpath)\n",
|
||||||
|
" from matplotlib.animation import FFMpegFileWriter\n",
|
||||||
|
" writer = FFMpegFileWriter(fps=25)\n",
|
||||||
|
" anim.save(movie_outpath, writer=writer)\n",
|
||||||
|
" display(HTML(\"\"\"\n",
|
||||||
|
" <div align=\"middle\">\n",
|
||||||
|
" <video width=\"80%\" controls>\n",
|
||||||
|
" <source src=\"{:s}\" type=\"video/mp4\">\n",
|
||||||
|
" </video>\n",
|
||||||
|
" </div>\n",
|
||||||
|
" \"\"\".format(movie_outpath)))\n",
|
||||||
|
" else:\n",
|
||||||
|
" #plt.rcParams[\"animation.html\"] = \"html5\"\n",
|
||||||
|
" plt.rcParams[\"animation.html\"] = \"jshtml\"\n",
|
||||||
|
" display(anim)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"nx = 1600\n",
|
||||||
|
"ny = nx//4\n",
|
||||||
|
"gamma = 1.4\n",
|
||||||
|
"t_end = 3.0\n",
|
||||||
|
"n_save = 200\n",
|
||||||
|
"outfile = \"data/euler_shock-bubble.nc\"\n",
|
||||||
|
"outdata = None\n",
|
||||||
|
"\n",
|
||||||
|
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genShockBubble(nx, ny, gamma)\n",
|
||||||
|
"outfile = makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#makeAnimation('data/euler_simdata_0000.nc')\n",
|
||||||
|
"makeAnimation(outfile, vis_type=VisType.Schlieren, vmin=-0.2, vmax=0.2, cmap=plt.cm.RdBu)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"nx = 1600\n",
|
||||||
|
"ny = nx//2\n",
|
||||||
|
"gamma = 1.4\n",
|
||||||
|
"roughness = 0.125\n",
|
||||||
|
"t_end = 2.0#10.0\n",
|
||||||
|
"n_save = 100#1000\n",
|
||||||
|
"outfile = \"data/euler_kelvin_helmholz.nc\"\n",
|
||||||
|
"outdata = None\n",
|
||||||
|
"\n",
|
||||||
|
"rho0, rho_u0, rho_v0, E0, dx, dy, dt = genKelvinHelmholz(nx, ny, gamma, roughness)\n",
|
||||||
|
"outfile = makeSimulation(outfile, t_end, rho0, rho_u0, rho_v0, E0, nx, ny, dx, dy, dt, gamma)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"makeAnimation(outfile, vis_type=VisType.Density, vmin=1.0, vmax=2.0)\n",
|
||||||
|
"#makeAnimation('data/euler_kelvin_helmholz_0012.nc', cmap=plt.cm.coolwarm, save_anim=True, vmin=1.1, vmax=1.9)\n",
|
||||||
|
"#makeAnimation('data/euler_kelvin_helmholz_0011.nc', save_anim=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"makeAnimation(outfile, vis_type=VisType.Schlieren, vmin=-0.2, vmax=0.2)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python [conda env:anaconda3]",
|
||||||
|
"language": "python",
|
||||||
|
"name": "conda-env-anaconda3-py"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -29,16 +29,17 @@ import io
|
|||||||
import hashlib
|
import hashlib
|
||||||
import logging
|
import logging
|
||||||
import gc
|
import gc
|
||||||
|
import netCDF4
|
||||||
|
|
||||||
import pycuda.compiler as cuda_compiler
|
import pycuda.compiler as cuda_compiler
|
||||||
import pycuda.gpuarray
|
import pycuda.gpuarray
|
||||||
import pycuda.driver as cuda
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Class which keeps track of time spent for a section of code
|
|
||||||
"""
|
|
||||||
class Timer(object):
|
class Timer(object):
|
||||||
|
"""
|
||||||
|
Class which keeps track of time spent for a section of code
|
||||||
|
"""
|
||||||
def __init__(self, tag, log_level=logging.DEBUG):
|
def __init__(self, tag, log_level=logging.DEBUG):
|
||||||
self.tag = tag
|
self.tag = tag
|
||||||
self.log_level = log_level
|
self.log_level = log_level
|
||||||
@ -63,9 +64,116 @@ class Timer(object):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class DataDumper(object):
|
||||||
|
"""
|
||||||
|
Simple class for holding a netCDF4 object
|
||||||
|
(handles opening and closing in a nice way)
|
||||||
|
Use as
|
||||||
|
with DataDumper("filename") as data:
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
def __init__(self, filename, *args, **kwargs):
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
#Create directory if needed
|
||||||
|
dirname = os.path.dirname(filename)
|
||||||
|
if not os.path.isdir(dirname):
|
||||||
|
self.logger.info("Creating directory " + dirname)
|
||||||
|
os.makedirs(dirname)
|
||||||
|
|
||||||
|
#Get mode of file if we have that
|
||||||
|
mode = None
|
||||||
|
if (args):
|
||||||
|
mode = args[0]
|
||||||
|
elif (kwargs and 'mode' in kwargs.keys()):
|
||||||
|
mode = kwargs['mode']
|
||||||
|
|
||||||
|
#Create new unique file if writing
|
||||||
|
if (mode):
|
||||||
|
if (("w" in mode) or ("+" in mode) or ("a" in mode)):
|
||||||
|
i = 0
|
||||||
|
stem, ext = os.path.splitext(filename)
|
||||||
|
while (os.path.isfile(filename)):
|
||||||
|
filename = "{:s}_{:04d}{:s}".format(stem, i, ext)
|
||||||
|
i = i+1
|
||||||
|
self.filename = os.path.abspath(filename)
|
||||||
|
|
||||||
|
#Save arguments
|
||||||
|
self.args = args
|
||||||
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
#Log output
|
||||||
|
self.logger.info("Writing output to " + self.filename)
|
||||||
|
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self.logger.info("Opening " + self.filename)
|
||||||
|
if (self.args):
|
||||||
|
self.logger.info("Arguments: " + str(self.args))
|
||||||
|
if (self.kwargs):
|
||||||
|
self.logger.info("Keyword arguments: " + str(self.kwargs))
|
||||||
|
self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, *args):
|
||||||
|
self.logger.info("Closing " + self.filename)
|
||||||
|
self.ncfile.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ProgressPrinter(object):
|
||||||
|
"""
|
||||||
|
Small helper class for
|
||||||
|
"""
|
||||||
|
def __init__(self, total_steps, print_every=5):
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self.start = time.time()
|
||||||
|
self.total_steps = total_steps
|
||||||
|
self.print_every = print_every
|
||||||
|
self.next_print_time = self.print_every
|
||||||
|
self.last_step = 0
|
||||||
|
self.secs_per_iter = None
|
||||||
|
|
||||||
|
def getPrintString(self, step):
|
||||||
|
elapsed = time.time() - self.start
|
||||||
|
if (elapsed > self.next_print_time):
|
||||||
|
dt = elapsed - (self.next_print_time - self.print_every)
|
||||||
|
dsteps = step - self.last_step
|
||||||
|
steps_remaining = self.total_steps - step
|
||||||
|
|
||||||
|
if (dsteps == 0):
|
||||||
|
return
|
||||||
|
|
||||||
|
self.last_step = step
|
||||||
|
self.next_print_time = elapsed + self.print_every
|
||||||
|
|
||||||
|
if not self.secs_per_iter:
|
||||||
|
self.secs_per_iter = dt / dsteps
|
||||||
|
self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps)
|
||||||
|
|
||||||
|
remaining_time = steps_remaining * self.secs_per_iter
|
||||||
|
|
||||||
|
return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format(
|
||||||
|
ProgressPrinter.progressBar(step, self.total_steps),
|
||||||
|
ProgressPrinter.timeString(elapsed + remaining_time),
|
||||||
|
ProgressPrinter.timeString(elapsed),
|
||||||
|
ProgressPrinter.timeString(remaining_time))
|
||||||
|
|
||||||
|
def timeString(seconds):
|
||||||
|
seconds = int(max(seconds, 1))
|
||||||
|
minutes, seconds = divmod(seconds, 60)
|
||||||
|
hours, minutes = divmod(minutes, 60)
|
||||||
|
periods = [('h', hours), ('m', minutes), ('s', seconds)]
|
||||||
|
time_string = ' '.join('{}{}'.format(value, name)
|
||||||
|
for name, value in periods
|
||||||
|
if value)
|
||||||
|
return time_string
|
||||||
|
|
||||||
|
def progressBar(step, total_steps, width=30):
|
||||||
|
progress = np.round(width * step / total_steps).astype(np.int32)
|
||||||
|
progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%"
|
||||||
|
return progressbar
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -113,6 +113,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
|
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
@ -131,6 +132,7 @@ class EE2D_KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
|
self.u1[3].data.gpudata, self.u1[3].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
||||||
|
@ -105,6 +105,7 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
@ -100,6 +100,7 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
@ -110,6 +110,7 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
@ -126,6 +127,7 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
@ -110,6 +110,7 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
def stepEuler(self, dt):
|
def stepEuler(self, dt):
|
||||||
self.substepRK(dt, 0)
|
self.substepRK(dt, 0)
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def stepRK(self, dt, order):
|
def stepRK(self, dt, order):
|
||||||
if (order != 2):
|
if (order != 2):
|
||||||
@ -117,6 +118,7 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
self.substepRK(dt, 0)
|
self.substepRK(dt, 0)
|
||||||
self.substepRK(dt, 1)
|
self.substepRK(dt, 1)
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
@ -110,6 +110,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
@ -126,6 +127,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
@ -101,6 +101,7 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
||||||
|
@ -80,8 +80,9 @@ class BaseSimulator:
|
|||||||
#Create a CUDA stream
|
#Create a CUDA stream
|
||||||
self.stream = cuda.Stream()
|
self.stream = cuda.Stream()
|
||||||
|
|
||||||
#Keep track of simulation time
|
#Keep track of simulation time and number of timesteps
|
||||||
self.t = 0.0;
|
self.t = 0.0
|
||||||
|
self.nt = 0
|
||||||
|
|
||||||
#Log progress every n seconds during simulation
|
#Log progress every n seconds during simulation
|
||||||
self.log_every = 5
|
self.log_every = 5
|
||||||
@ -209,14 +210,16 @@ class BaseSimulator:
|
|||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
def check(self):
|
|
||||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
|
||||||
|
|
||||||
def sim_time(self):
|
|
||||||
return self.t
|
|
||||||
|
|
||||||
def synchronize(self):
|
def synchronize(self):
|
||||||
self.stream.synchronize()
|
self.stream.synchronize()
|
||||||
|
|
||||||
|
def check(self):
|
||||||
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
|
def simTime(self):
|
||||||
|
return self.t
|
||||||
|
|
||||||
|
def simSteps(self):
|
||||||
|
return self.nt
|
||||||
|
|
||||||
|
@ -103,6 +103,7 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
|
||||||
@ -118,6 +119,7 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
self.nt += 1
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.u0.download(self.stream)
|
return self.u0.download(self.stream)
|
@ -154,10 +154,10 @@ __global__ void KP07DimsplitKernel(
|
|||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_+4, ny_+4);
|
readBlock<w, h, gc>( rho0_ptr_, rho0_pitch_, Q[0], nx_, ny_);
|
||||||
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_+4, ny_+4);
|
readBlock<w, h, gc>(rho_u0_ptr_, rho_u0_pitch_, Q[1], nx_, ny_);
|
||||||
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_+4, ny_+4);
|
readBlock<w, h, gc>(rho_v0_ptr_, rho_v0_pitch_, Q[2], nx_, ny_);
|
||||||
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_+4, ny_+4);
|
readBlock<w, h, gc>( E0_ptr_, E0_pitch_, Q[3], nx_, ny_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Fix boundary conditions
|
//Fix boundary conditions
|
||||||
@ -226,6 +226,26 @@ __global__ void KP07DimsplitKernel(
|
|||||||
|
|
||||||
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
evolveF<w, h, gc, vars>(Q, F, dx_, dt_);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
|
//This is the RK2-part
|
||||||
|
const int tx = threadIdx.x + gc;
|
||||||
|
const int ty = threadIdx.y + gc;
|
||||||
|
const float q1 = Q[0][ty][tx];
|
||||||
|
const float q2 = Q[1][ty][tx];
|
||||||
|
const float q3 = Q[2][ty][tx];
|
||||||
|
const float q4 = Q[3][ty][tx];
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
readBlock<w, h, gc>( rho1_ptr_, rho1_pitch_, Q[0], nx_, ny_);
|
||||||
|
readBlock<w, h, gc>(rho_u1_ptr_, rho_u1_pitch_, Q[1], nx_, ny_);
|
||||||
|
readBlock<w, h, gc>(rho_v1_ptr_, rho_v1_pitch_, Q[2], nx_, ny_);
|
||||||
|
readBlock<w, h, gc>( E1_ptr_, E1_pitch_, Q[3], nx_, ny_);
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
|
Q[0][ty][tx] = 0.5f*( Q[0][ty][tx] + q1 );
|
||||||
|
Q[1][ty][tx] = 0.5f*( Q[1][ty][tx] + q2 );
|
||||||
|
Q[2][ty][tx] = 0.5f*( Q[2][ty][tx] + q3 );
|
||||||
|
Q[3][ty][tx] = 0.5f*( Q[3][ty][tx] + q4 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -92,19 +92,29 @@ __device__ float desingularize(float x_, float eps_) {
|
|||||||
template<int block_width, int block_height, int ghost_cells>
|
template<int block_width, int block_height, int ghost_cells>
|
||||||
inline __device__ void readBlock(float* ptr_, int pitch_,
|
inline __device__ void readBlock(float* ptr_, int pitch_,
|
||||||
float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells],
|
float shmem[block_height+2*ghost_cells][block_width+2*ghost_cells],
|
||||||
const int max_x_, const int max_y_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of block within domain
|
//Index of block within domain
|
||||||
const int bx = blockDim.x * blockIdx.x;
|
const int bx = blockDim.x * blockIdx.x;
|
||||||
const int by = blockDim.y * blockIdx.y;
|
const int by = blockDim.y * blockIdx.y;
|
||||||
|
|
||||||
|
const int gc_pad = 4;
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
//Loop over all variables
|
//Loop over all variables
|
||||||
for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
|
for (int j=threadIdx.y; j<block_height+2*ghost_cells; j+=block_height) {
|
||||||
const int l = min(by + j, max_y_-1);
|
//const int l = min(by + j, ny_+2*ghost_cells-1);
|
||||||
|
const int y = by + j;
|
||||||
|
const int y_offset = ( (int) (y < gc_pad) - (int) (y >= ny_+2*ghost_cells-gc_pad) ) * (ny_+2*ghost_cells - 2*gc_pad);
|
||||||
|
const int l = y + y_offset;
|
||||||
float* row = (float*) ((char*) ptr_ + pitch_*l);
|
float* row = (float*) ((char*) ptr_ + pitch_*l);
|
||||||
|
|
||||||
for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
|
for (int i=threadIdx.x; i<block_width+2*ghost_cells; i+=block_width) {
|
||||||
const int k = min(bx + i, max_x_-1);
|
//const int k = min(bx + i, nx_+2*ghost_cells-1);
|
||||||
|
|
||||||
|
const int x = bx + i;
|
||||||
|
const int gc_pad = 4;
|
||||||
|
const int x_offset = ( (int) (x < gc_pad) - (int) (x >= nx_+2*ghost_cells-gc_pad) ) * (nx_+2*ghost_cells - 2*gc_pad);
|
||||||
|
const int k = x + x_offset;
|
||||||
|
|
||||||
shmem[j][i] = row[k];
|
shmem[j][i] = row[k];
|
||||||
}
|
}
|
||||||
@ -248,6 +258,16 @@ __device__ void noFlowBoundary(float Q[block_height+2*ghost_cells][block_width+2
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template<int block_width, int block_height, int ghost_cells, int vars>
|
template<int block_width, int block_height, int ghost_cells, int vars>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user