mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 06:24:13 +02:00
495 lines
53 KiB
Plaintext
495 lines
53 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Lets have matplotlib \"inline\"\n",
|
|
"%matplotlib inline\n",
|
|
"\n",
|
|
"# Add line profiler\n",
|
|
"%load_ext line_profiler\n",
|
|
"\n",
|
|
"#Import packages we need\n",
|
|
"import numpy as np\n",
|
|
"from matplotlib import animation, rc\n",
|
|
"from matplotlib import pyplot as plt\n",
|
|
"\n",
|
|
"import subprocess\n",
|
|
"import os\n",
|
|
"import gc\n",
|
|
"import datetime\n",
|
|
"import importlib\n",
|
|
"\n",
|
|
"import pycuda.driver as cuda\n",
|
|
"\n",
|
|
"try:\n",
|
|
" from StringIO import StringIO\n",
|
|
"except ImportError:\n",
|
|
" from io import StringIO\n",
|
|
"\n",
|
|
"#Set large figure sizes\n",
|
|
"#Note, this prevents nice figures for articles...\n",
|
|
"rc('figure', figsize=(16.0, 12.0))\n",
|
|
"rc('animation', html='html5')\n",
|
|
"\n",
|
|
"#Finally, import our simulator\n",
|
|
"#from SWESimulators import FBL, CTCS, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, CDKLM16, DataOutput, PlotHelper"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CUDA version (9, 1, 0)\n",
|
|
"Driver version 9010\n",
|
|
"Using GeForce 840M\n",
|
|
" => compute capability: (5, 0)\n",
|
|
" => memory: 2048.0 MB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"class CudaContext(object):\n",
|
|
" def __init__(self, verbose=True, blocking=False):\n",
|
|
" self.verbose = verbose\n",
|
|
" self.blocking = blocking\n",
|
|
" \n",
|
|
" cuda.init(flags=0)\n",
|
|
" \n",
|
|
" try:\n",
|
|
" cuda.Context.pop()\n",
|
|
" if (self.verbose):\n",
|
|
" print(\"=== WARNING ===\")\n",
|
|
" print(\"Popped existing context\")\n",
|
|
" print(\"=== WARNING ===\")\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" \n",
|
|
" if (self.verbose):\n",
|
|
" print(\"CUDA version \" + str(cuda.get_version()))\n",
|
|
" print(\"Driver version \" + str(cuda.get_driver_version()))\n",
|
|
"\n",
|
|
" self.cuda_device = cuda.Device(0)\n",
|
|
" if (self.verbose):\n",
|
|
" print(\"Using \" + self.cuda_device.name())\n",
|
|
" print(\" => compute capability: \" + str(self.cuda_device.compute_capability()))\n",
|
|
" print(\" => memory: \" + str(self.cuda_device.total_memory() / (1024*1024)) + \" MB\")\n",
|
|
"\n",
|
|
" if (self.blocking):\n",
|
|
" self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC)\n",
|
|
" if (self.verbose):\n",
|
|
" print(\"=== WARNING ===\")\n",
|
|
" print(\"Using blocking context\")\n",
|
|
" print(\"=== WARNING ===\")\n",
|
|
" else:\n",
|
|
" self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)\n",
|
|
" \n",
|
|
" \n",
|
|
" def __del__(self, *args):\n",
|
|
" if self.verbose:\n",
|
|
" print(\"Cleaning up CUDA context\")\n",
|
|
" \n",
|
|
" self.cuda_context.detach()\n",
|
|
" cuda.Context.pop()\n",
|
|
"\n",
|
|
" \n",
|
|
"my_context = CudaContext(verbose=True, blocking=False)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=> sleep 125.088930 ms\n",
|
|
"=> elapsed time: 0.125089 s\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import time\n",
|
|
"class Timer(object):\n",
|
|
" def __init__(self, tag, verbose=True):\n",
|
|
" self.verbose = verbose\n",
|
|
" self.tag = tag\n",
|
|
" \n",
|
|
" def __enter__(self):\n",
|
|
" self.start = time.time()\n",
|
|
" return self\n",
|
|
" \n",
|
|
" def __exit__(self, *args):\n",
|
|
" self.end = time.time()\n",
|
|
" self.secs = self.end - self.start\n",
|
|
" self.msecs = self.secs * 1000 # millisecs\n",
|
|
" if self.verbose:\n",
|
|
" print(\"=> \" + self.tag + ' %f ms' % self.msecs)\n",
|
|
" \n",
|
|
"with Timer(\"sleep\", verbose=True) as t:\n",
|
|
" time.sleep(0.125)\n",
|
|
" \n",
|
|
"print(\"=> elapsed time: %f s\" % t.secs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def gen_test_data(nx, ny, num_ghost_cells):\n",
|
|
" width = 100.0\n",
|
|
" height = width\n",
|
|
" dx = width / float(nx)\n",
|
|
" dy = height / float(ny)\n",
|
|
"\n",
|
|
" h = np.zeros((ny+2*num_ghost_cells, nx+2*num_ghost_cells), dtype=np.float32); \n",
|
|
" hu = np.zeros((ny+2*num_ghost_cells, nx+2*num_ghost_cells), dtype=np.float32);\n",
|
|
" hv = np.zeros((ny+2*num_ghost_cells, nx+2*num_ghost_cells), dtype=np.float32);\n",
|
|
"\n",
|
|
" #Create a gaussian \"dam break\" that will not form shocks\n",
|
|
" x_center = dx*nx/2.0\n",
|
|
" y_center = dy*ny/2.0\n",
|
|
" size = width \n",
|
|
" for j in range(-num_ghost_cells, ny+num_ghost_cells):\n",
|
|
" y = dy*(j+0.5) - y_center\n",
|
|
" for i in range(-num_ghost_cells, nx+num_ghost_cells):\n",
|
|
" x = dx*(i+0.5) - x_center\n",
|
|
" \n",
|
|
" h[j+num_ghost_cells, i+num_ghost_cells] = 0.5 + 0.1*np.exp(-(x**2/size + y**2/size))\n",
|
|
" #hu[j+num_ghost_cells, i+num_ghost_cells] = 0.01*np.sin(x)*np.exp(-(x**2/size))\n",
|
|
" hu[j+num_ghost_cells, i+num_ghost_cells] = 0.1*np.exp(-(x**2/size + y**2/size))\n",
|
|
" hv[j+num_ghost_cells, i+num_ghost_cells] = 0.1*np.exp(-(x**2/size + y**2/size))\n",
|
|
" \n",
|
|
" return h, hu, hv, dx, dy, nx, ny"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#%lprun -f gen_test_data gen_test_data(100, 150, 2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=> upload (async) 8.011341 ms\n",
|
|
"=> download (async) 20.012617 ms\n",
|
|
"=> sync 0.000000 ms\n",
|
|
"Sum of absolute difference: 0.0\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from SWESimulators import Common\n",
|
|
"importlib.reload(Common)\n",
|
|
"\n",
|
|
"nx = 1000\n",
|
|
"ny = 1500\n",
|
|
"nx_halo = 2\n",
|
|
"ny_halo = 3\n",
|
|
"a = np.random.rand(ny+2*ny_halo, nx+2*nx_halo).astype(np.float32)\n",
|
|
"\n",
|
|
"import pycuda.driver as cuda\n",
|
|
"stream = cuda.Stream()\n",
|
|
"\n",
|
|
"with Timer(\"upload (async)\", verbose=True) as t:\n",
|
|
" a_gpu = Common.CUDAArray2D(stream, nx, ny, nx_halo, ny_halo, a)\n",
|
|
"\n",
|
|
"with Timer(\"download (async)\", verbose=True) as t:\n",
|
|
" b = a_gpu.download(stream, async=True)\n",
|
|
" \n",
|
|
"with Timer(\"sync\", verbose=True) as t:\n",
|
|
" cuda.Context.synchronize()\n",
|
|
" \n",
|
|
"print(\"Sum of absolute difference: \", np.sum(np.abs(a-b)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=> compile 6411.859989 ms\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"with Timer(\"compile\", verbose=True) as t:\n",
|
|
" module = Common.get_kernel(\"FORCE_kernel.cu\", 16, 16)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#%lprun -f Common.get_kernel Common.get_kernel(\"FORCE_kernel.cu\", 16, 16)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=> construct 4934.113741 ms\n",
|
|
"=> step 14.986992 ms\n",
|
|
"=> download 2.002239 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.colorbar.Colorbar at 0x94b81290f0>"
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 4 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from SWESimulators import LxF\n",
|
|
"importlib.reload(LxF)\n",
|
|
"\n",
|
|
"nx = 10\n",
|
|
"ny = 15\n",
|
|
"num_ghost_cells = 1\n",
|
|
"dt = 0.1\n",
|
|
"g = 9.81\n",
|
|
"\n",
|
|
"h0, hu0, hv0, dx, dy, nx, ny = gen_test_data(nx, ny, num_ghost_cells)\n",
|
|
"plt.figure()\n",
|
|
"plt.subplot(121)\n",
|
|
"plt.imshow(h0)\n",
|
|
"plt.colorbar()\n",
|
|
"\n",
|
|
"with Timer(\"construct\") as t:\n",
|
|
" sim = LxF.LxF(h0, hu0, hv0, \\\n",
|
|
" nx, ny, \\\n",
|
|
" dx, dy, dt, \\\n",
|
|
" g)\n",
|
|
"\n",
|
|
"with Timer(\"step\") as t:\n",
|
|
" t = sim.step(10.0)\n",
|
|
" \n",
|
|
"with Timer(\"download\") as t:\n",
|
|
" h1, hu1, hv1 = sim.download()\n",
|
|
"\n",
|
|
"plt.subplot(122)\n",
|
|
"plt.imshow(h1)\n",
|
|
"plt.colorbar()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=> construct 4916.617155 ms\n",
|
|
"=> step 118.532658 ms\n",
|
|
"=> download 0.992298 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.colorbar.Colorbar at 0x94b88cda58>"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 4 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from SWESimulators import FORCE\n",
|
|
"importlib.reload(FORCE)\n",
|
|
"\n",
|
|
"nx = 10\n",
|
|
"ny = 15\n",
|
|
"num_ghost_cells = 1\n",
|
|
"dt = 0.01\n",
|
|
"g = 9.81\n",
|
|
"\n",
|
|
"h0, hu0, hv0, dx, dy, nx, ny = gen_test_data(nx, ny, num_ghost_cells)\n",
|
|
"plt.figure()\n",
|
|
"plt.subplot(121)\n",
|
|
"plt.imshow(h0)\n",
|
|
"plt.colorbar()\n",
|
|
"\n",
|
|
"with Timer(\"construct\") as t:\n",
|
|
" sim = FORCE.FORCE(h0, hu0, hv0, \\\n",
|
|
" nx, ny, \\\n",
|
|
" dx, dy, dt, \\\n",
|
|
" g)\n",
|
|
"\n",
|
|
"with Timer(\"step\") as t:\n",
|
|
" t = sim.step(10.0)\n",
|
|
" \n",
|
|
"with Timer(\"download\") as t:\n",
|
|
" h1, hu1, hv1 = sim.download()\n",
|
|
"\n",
|
|
"plt.subplot(122)\n",
|
|
"plt.imshow(h1)\n",
|
|
"plt.colorbar()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {
|
|
"scrolled": false
|
|
},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"=> construct 4879.117727 ms\n",
|
|
"=> step 109.936714 ms\n",
|
|
"=> download 2.000093 ms\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<matplotlib.colorbar.Colorbar at 0x94ba01c3c8>"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 432x288 with 4 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from SWESimulators import HLL\n",
|
|
"importlib.reload(HLL)\n",
|
|
"\n",
|
|
"nx = 10\n",
|
|
"ny = 15\n",
|
|
"num_ghost_cells = 1\n",
|
|
"dt = 0.01\n",
|
|
"g = 9.81\n",
|
|
"\n",
|
|
"h0, hu0, hv0, dx, dy, nx, ny = gen_test_data(nx, ny, num_ghost_cells)\n",
|
|
"plt.figure()\n",
|
|
"plt.subplot(121)\n",
|
|
"plt.imshow(h0)\n",
|
|
"plt.colorbar()\n",
|
|
"\n",
|
|
"with Timer(\"construct\") as t:\n",
|
|
" sim = HLL.HLL(h0, hu0, hv0, \\\n",
|
|
" nx, ny, \\\n",
|
|
" dx, dy, dt, \\\n",
|
|
" g)\n",
|
|
"\n",
|
|
"with Timer(\"step\") as t:\n",
|
|
" t = sim.step(10.0)\n",
|
|
" \n",
|
|
"with Timer(\"download\") as t:\n",
|
|
" h1, hu1, hv1 = sim.download()\n",
|
|
"\n",
|
|
"plt.subplot(122)\n",
|
|
"plt.imshow(h1)\n",
|
|
"plt.colorbar()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|