mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-07-04 05:41:00 +02:00
refactor(kernel): split Common.py to a separate package
This commit is contained in:
parent
8f24cd45ea
commit
c54f08c417
@ -27,16 +27,11 @@
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
|
||||
"\n",
|
||||
"import subprocess\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import datetime\n",
|
||||
"import importlib\n",
|
||||
"import logging\n",
|
||||
"from socket import gethostname\n",
|
||||
"\n",
|
||||
"import pycuda.driver as cuda\n",
|
||||
"import pycuda.compiler\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" from StringIO import StringIO\n",
|
||||
@ -55,7 +50,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from GPUSimulators import Common, IPythonMagic, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner"
|
||||
"from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner\n",
|
||||
"from GPUSimulators.common import common"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -124,14 +120,14 @@
|
||||
"evalue": "All-NaN slice encountered",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m simulators \u001b[38;5;241m=\u001b[39m [LxF\u001b[38;5;241m.\u001b[39mLxF, FORCE\u001b[38;5;241m.\u001b[39mFORCE, HLL\u001b[38;5;241m.\u001b[39mHLL, HLL2\u001b[38;5;241m.\u001b[39mHLL2, KP07\u001b[38;5;241m.\u001b[39mKP07, KP07_dimsplit\u001b[38;5;241m.\u001b[39mKP07_dimsplit, WAF\u001b[38;5;241m.\u001b[39mWAF]\n\u001b[0;32m----> 2\u001b[0m peak_performance \u001b[38;5;241m=\u001b[39m [autotuner\u001b[38;5;241m.\u001b[39mget_peak_performance(simulator) \u001b[38;5;28;01mfor\u001b[39;00m simulator \u001b[38;5;129;01min\u001b[39;00m simulators]\n\u001b[1;32m 3\u001b[0m megacells \u001b[38;5;241m=\u001b[39m [performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmegacells\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m performance \u001b[38;5;129;01min\u001b[39;00m peak_performance]\n\u001b[1;32m 4\u001b[0m xlabels \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{:s}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124mx\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(simulators[i]\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m'\u001b[39m], performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;28;01mfor\u001b[39;00m i, performance \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(peak_performance)]\n",
|
||||
"Cell \u001b[0;32mIn[9], line 2\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1\u001b[0m simulators \u001b[38;5;241m=\u001b[39m [LxF\u001b[38;5;241m.\u001b[39mLxF, FORCE\u001b[38;5;241m.\u001b[39mFORCE, HLL\u001b[38;5;241m.\u001b[39mHLL, HLL2\u001b[38;5;241m.\u001b[39mHLL2, KP07\u001b[38;5;241m.\u001b[39mKP07, KP07_dimsplit\u001b[38;5;241m.\u001b[39mKP07_dimsplit, WAF\u001b[38;5;241m.\u001b[39mWAF]\n\u001b[0;32m----> 2\u001b[0m peak_performance \u001b[38;5;241m=\u001b[39m [\u001b[43mautotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_peak_performance\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m simulator \u001b[38;5;129;01min\u001b[39;00m simulators]\n\u001b[1;32m 3\u001b[0m megacells \u001b[38;5;241m=\u001b[39m [performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmegacells\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m performance \u001b[38;5;129;01min\u001b[39;00m peak_performance]\n\u001b[1;32m 4\u001b[0m xlabels \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{:s}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124mx\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(simulators[i]\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m'\u001b[39m], performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;28;01mfor\u001b[39;00m i, performance \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(peak_performance)]\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001b[0m, in \u001b[0;36mAutotuner.get_peak_performance\u001b[0;34m(self, simulator)\u001b[0m\n\u001b[1;32m 130\u001b[0m block_widths \u001b[38;5;241m=\u001b[39m data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_block_widths\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 131\u001b[0m block_heights \u001b[38;5;241m=\u001b[39m data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_block_heights\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m--> 132\u001b[0m j, i \u001b[38;5;241m=\u001b[39m \u001b[43mfind_max_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmegacells\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 134\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mperformance[key] \u001b[38;5;241m=\u001b[39m { \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m\"\u001b[39m: block_widths[i],\n\u001b[1;32m 135\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m\"\u001b[39m: block_heights[j],\n\u001b[1;32m 136\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmegacells\u001b[39m\u001b[38;5;124m\"\u001b[39m: megacells[j, i] }\n\u001b[1;32m 137\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReturning \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m as peak performance parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mperformance[key])\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001b[0m, in \u001b[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001b[0;34m(megacells)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfind_max_index\u001b[39m(megacells):\n\u001b[0;32m--> 126\u001b[0m max_index \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnanargmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmegacells\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39munravel_index(max_index, megacells\u001b[38;5;241m.\u001b[39mshape)\n",
|
||||
"File \u001b[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001b[0m, in \u001b[0;36mnanargmax\u001b[0;34m(a, axis, out, keepdims)\u001b[0m\n\u001b[1;32m 611\u001b[0m mask \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mall(mask, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m 612\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39many(mask):\n\u001b[0;32m--> 613\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll-NaN slice encountered\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 614\u001b[0m res \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39margmax(a, axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, keepdims\u001b[38;5;241m=\u001b[39mkeepdims)\n\u001b[1;32m 615\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: All-NaN slice encountered"
|
||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[0;32mIn[9], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [autotuner\u001B[38;5;241m.\u001B[39mget_peak_performance(simulator) \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m 3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m 4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n",
|
||||
"Cell \u001B[0;32mIn[9], line 2\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m 1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m 3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m 4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m 130\u001B[0m block_widths \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m 131\u001B[0m block_heights \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[0;32m--> 132\u001B[0m j, i \u001B[38;5;241m=\u001B[39m \u001B[43mfind_max_index\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key] \u001B[38;5;241m=\u001B[39m { \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_widths[i],\n\u001B[1;32m 135\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_heights[j],\n\u001B[1;32m 136\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m\"\u001B[39m: megacells[j, i] }\n\u001B[1;32m 137\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mReturning \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m as peak performance parameters\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key])\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001B[0;34m(megacells)\u001B[0m\n\u001B[1;32m 125\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n\u001B[0;32m--> 126\u001B[0m max_index \u001B[38;5;241m=\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnanargmax\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 127\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m np\u001B[38;5;241m.\u001B[39munravel_index(max_index, megacells\u001B[38;5;241m.\u001B[39mshape)\n",
|
||||
"File \u001B[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001B[0m, in \u001B[0;36mnanargmax\u001B[0;34m(a, axis, out, keepdims)\u001B[0m\n\u001B[1;32m 611\u001B[0m mask \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mall(mask, axis\u001B[38;5;241m=\u001B[39maxis)\n\u001B[1;32m 612\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m np\u001B[38;5;241m.\u001B[39many(mask):\n\u001B[0;32m--> 613\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll-NaN slice encountered\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 614\u001B[0m res \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39margmax(a, axis\u001B[38;5;241m=\u001B[39maxis, out\u001B[38;5;241m=\u001B[39mout, keepdims\u001B[38;5;241m=\u001B[39mkeepdims)\n\u001B[1;32m 615\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m res\n",
|
||||
"\u001B[0;31mValueError\u001B[0m: All-NaN slice encountered"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -49,9 +49,6 @@
|
||||
"import time\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import datetime\n",
|
||||
"\n",
|
||||
"import pycuda.driver as cuda\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" from StringIO import StringIO\n",
|
||||
@ -59,7 +56,8 @@
|
||||
" from io import StringIO\n",
|
||||
"\n",
|
||||
"#Finally, import our simulator\n",
|
||||
"from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, IPythonMagic"
|
||||
"from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF\n",
|
||||
"from GPUSimulators.common import common"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -42,15 +42,10 @@
|
||||
"from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
|
||||
"#import mpld3\n",
|
||||
"\n",
|
||||
"import subprocess\n",
|
||||
"import socket\n",
|
||||
"import time\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import datetime\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"import pycuda.driver as cuda\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" from StringIO import StringIO\n",
|
||||
@ -65,7 +60,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Finally, import our simulator\n",
|
||||
"from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, IPythonMagic\n",
|
||||
"from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF\n",
|
||||
"from GPUSimulators.common import common\n",
|
||||
"from GPUSimulators.helpers import InitialConditions"
|
||||
]
|
||||
},
|
||||
@ -250,8 +246,8 @@
|
||||
" sim.simulate(1.0, dt=dt)\n",
|
||||
" sim.check()\n",
|
||||
" \n",
|
||||
" nt = sim.simSteps()\n",
|
||||
" dt = sim.simTime() / nt\n",
|
||||
" nt = sim.sim_steps()\n",
|
||||
" dt = sim.sim_time() / nt\n",
|
||||
" h, hu, hv = sim.download()\n",
|
||||
" \n",
|
||||
" if (transpose):\n",
|
||||
|
@ -42,15 +42,10 @@
|
||||
"from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
|
||||
"#import mpld3\n",
|
||||
"\n",
|
||||
"import subprocess\n",
|
||||
"import socket\n",
|
||||
"import time\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import datetime\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"import pycuda.driver as cuda\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" from StringIO import StringIO\n",
|
||||
@ -65,7 +60,8 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#Finally, import our simulator\n",
|
||||
"from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, IPythonMagic\n",
|
||||
"from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF\n",
|
||||
"from GPUSimulators.common import common\n",
|
||||
"from GPUSimulators.helpers import InitialConditions"
|
||||
]
|
||||
},
|
||||
@ -250,8 +246,8 @@
|
||||
" sim.simulate(1.0, dt=dt)\n",
|
||||
" sim.check()\n",
|
||||
" \n",
|
||||
" nt = sim.simSteps()\n",
|
||||
" dt = sim.simTime() / nt\n",
|
||||
" nt = sim.sim_steps()\n",
|
||||
" dt = sim.sim_time() / nt\n",
|
||||
" h, hu, hv = sim.download()\n",
|
||||
" \n",
|
||||
" if (transpose):\n",
|
||||
|
141864
EulerTesting.ipynb
141864
EulerTesting.ipynb
File diff suppressed because one or more lines are too long
@ -29,15 +29,159 @@ from tqdm.auto import tqdm
|
||||
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from GPUSimulators import Common, Simulator
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import common, Timer
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
|
||||
|
||||
def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
|
||||
"""
|
||||
Runs a benchmark, and returns the number of megacells achieved
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize simulator
|
||||
try:
|
||||
sim = simulator(**arguments)
|
||||
except:
|
||||
# An exception raised - not possible to continue
|
||||
logger.debug("Failed creating %s with arguments %s", simulator.__name__, str(arguments))
|
||||
# raise RuntimeError("Failed creating %s with arguments %s", simulator.__name__, str(arguments))
|
||||
return np.nan
|
||||
|
||||
# Create timer events
|
||||
start = cuda.Event()
|
||||
end = cuda.Event()
|
||||
|
||||
# Warmup
|
||||
for i in range(warmup_timesteps):
|
||||
sim.substep(sim.dt, i)
|
||||
|
||||
# Run simulation with timer
|
||||
start.record(sim.stream)
|
||||
for i in range(timesteps):
|
||||
sim.substep(sim.dt, i)
|
||||
end.record(sim.stream)
|
||||
|
||||
# Synchronize end event
|
||||
end.synchronize()
|
||||
|
||||
# Compute megacells
|
||||
gpu_elapsed = end.time_since(start) * 1.0e-3
|
||||
megacells = (sim.nx * sim.ny * timesteps / (1000 * 1000)) / gpu_elapsed
|
||||
|
||||
# Sanity check solution
|
||||
h, hu, hv = sim.download()
|
||||
sane = True
|
||||
sane = sane and sanity_check(0.3, 0.7)
|
||||
sane = sane and sanity_check(-0.2, 0.2)
|
||||
sane = sane and sanity_check(-0.2, 0.2)
|
||||
|
||||
if sane:
|
||||
logger.debug("%s [%d x %d] succeeded: %f megacells, gpu elapsed %f", simulator.__name__,
|
||||
arguments["block_width"], arguments["block_height"], megacells, gpu_elapsed)
|
||||
return megacells
|
||||
else:
|
||||
logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"],
|
||||
arguments["block_height"], gpu_elapsed)
|
||||
# raise RuntimeError("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
|
||||
return np.nan
|
||||
|
||||
|
||||
def gen_test_data(nx, ny, g):
|
||||
"""
|
||||
Generates test dataset
|
||||
"""
|
||||
|
||||
width = 100.0
|
||||
height = 100.0
|
||||
dx = width / float(nx)
|
||||
dy = height / float(ny)
|
||||
|
||||
x_center = dx * nx / 2.0
|
||||
y_center = dy * ny / 2.0
|
||||
|
||||
# Create a gaussian "dam break" that will not form shocks
|
||||
size = width / 5.0
|
||||
dt = 10 ** 10
|
||||
|
||||
h = np.zeros((ny, nx), dtype=np.float32)
|
||||
hu = np.zeros((ny, nx), dtype=np.float32)
|
||||
hv = np.zeros((ny, nx), dtype=np.float32)
|
||||
|
||||
extent = 1.0 / np.sqrt(2.0)
|
||||
x = (dx * (np.arange(0, nx, dtype=np.float32) + 0.5) - x_center) / size
|
||||
y = (dy * (np.arange(0, ny, dtype=np.float32) + 0.5) - y_center) / size
|
||||
xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
|
||||
r = np.minimum(1.0, np.sqrt(xv ** 2 + yv ** 2))
|
||||
xv = None
|
||||
yv = None
|
||||
gc.collect()
|
||||
|
||||
# Generate highres
|
||||
cos = np.cos(np.pi * r)
|
||||
h = 0.5 + 0.1 * 0.5 * (1.0 + cos)
|
||||
hu = 0.1 * 0.5 * (1.0 + cos)
|
||||
hv = hu.copy()
|
||||
|
||||
scale = 0.7
|
||||
max_h_estimate = 0.6
|
||||
max_u_estimate = 0.1 * np.sqrt(2.0)
|
||||
dx = width / nx
|
||||
dy = height / ny
|
||||
dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(g * max_h_estimate))
|
||||
|
||||
return h, hu, hv, dx, dy, dt
|
||||
|
||||
|
||||
def sanity_check(variable, bound_min, bound_max):
|
||||
"""
|
||||
Checks that a variable is "sane"
|
||||
"""
|
||||
|
||||
maxval = np.amax(variable)
|
||||
minval = np.amin(variable)
|
||||
if (np.isnan(maxval)
|
||||
or np.isnan(minval)
|
||||
or maxval > bound_max
|
||||
or minval < bound_min):
|
||||
return False
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
|
||||
"""
|
||||
Runs a set of benchmarks for a single simulator
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
megacells = np.empty((len(block_heights), len(block_widths)))
|
||||
megacells.fill(np.nan)
|
||||
|
||||
logger.debug("Running %d benchmarks with %s", len(block_heights) * len(block_widths), simulator.__name__)
|
||||
|
||||
sim_arguments = arguments.copy()
|
||||
|
||||
with Timer(simulator.__name__) as t:
|
||||
for j, block_height in enumerate(tqdm(block_heights, desc='Autotuner Progress')):
|
||||
sim_arguments.update({'block_height': block_height})
|
||||
for i, block_width in enumerate(tqdm(block_widths, desc=f'Iteration {j} Progress', leave=False)):
|
||||
sim_arguments.update({'block_width': block_width})
|
||||
megacells[j, i] = run_benchmark(sim_arguments)
|
||||
|
||||
logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs)
|
||||
|
||||
return megacells
|
||||
|
||||
|
||||
class Autotuner:
|
||||
def __init__(self,
|
||||
nx=2048, ny=2048,
|
||||
block_widths=range(8, 32, 1),
|
||||
block_heights=range(8, 32, 1)):
|
||||
def __init__(self,
|
||||
nx=2048, ny=2048,
|
||||
block_widths=range(8, 32, 1),
|
||||
block_heights=range(8, 32, 1)):
|
||||
logger = logging.getLogger(__name__)
|
||||
self.filename = "autotuning_data_" + gethostname() + ".npz"
|
||||
self.nx = nx
|
||||
@ -48,50 +192,51 @@ class Autotuner:
|
||||
|
||||
def benchmark(self, simulator, force=False):
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
#Run through simulators and benchmark
|
||||
|
||||
# Run through simulators and benchmark
|
||||
key = str(simulator.__name__)
|
||||
logger.info("Benchmarking %s to %s", key, self.filename)
|
||||
|
||||
#If this simulator has been benchmarked already, skip it
|
||||
if (force==False and os.path.isfile(self.filename)):
|
||||
|
||||
# If this simulator has been benchmarked already, skip it
|
||||
if force == False and os.path.isfile(self.filename):
|
||||
with np.load(self.filename) as data:
|
||||
if key in data["simulators"]:
|
||||
logger.info("%s already benchmarked - skipping", key)
|
||||
return
|
||||
|
||||
|
||||
# Set arguments to send to the simulators during construction
|
||||
context = CudaContext.CudaContext(autotuning=False)
|
||||
context = CudaContext(autotuning=False)
|
||||
g = 9.81
|
||||
h0, hu0, hv0, dx, dy, dt = Autotuner.gen_test_data(nx=self.nx, ny=self.ny, g=g)
|
||||
h0, hu0, hv0, dx, dy, dt = gen_test_data(ny=self.ny, g=g)
|
||||
arguments = {
|
||||
'context': context,
|
||||
'h0': h0, 'hu0': hu0, 'hv0': hv0,
|
||||
'nx': self.nx, 'ny': self.ny,
|
||||
'dx': dx, 'dy': dy, 'dt': 0.9*dt,
|
||||
'dx': dx, 'dy': dy, 'dt': 0.9 * dt,
|
||||
'g': g,
|
||||
'compile_opts': ['-Wno-deprecated-gpu-targets']
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
# Load existing data into memory
|
||||
benchmark_data = {
|
||||
"simulators": [],
|
||||
"simulators": [],
|
||||
}
|
||||
if (os.path.isfile(self.filename)):
|
||||
if os.path.isfile(self.filename):
|
||||
with np.load(self.filename) as data:
|
||||
for k, v in data.items():
|
||||
benchmark_data[k] = v
|
||||
|
||||
|
||||
# Run benchmark
|
||||
benchmark_data[key + "_megacells"] = Autotuner.benchmark_single_simulator(simulator, arguments, self.block_widths, self.block_heights)
|
||||
benchmark_data[key + "_megacells"] = benchmark_single_simulator(arguments, self.block_widths,
|
||||
self.block_heights)
|
||||
benchmark_data[key + "_block_widths"] = self.block_widths
|
||||
benchmark_data[key + "_block_heights"] = self.block_heights
|
||||
benchmark_data[key + "_arguments"] = str(arguments)
|
||||
|
||||
|
||||
existing_sims = benchmark_data["simulators"]
|
||||
if (isinstance(existing_sims, np.ndarray)):
|
||||
if isinstance(existing_sims, np.ndarray):
|
||||
existing_sims = existing_sims.tolist()
|
||||
if (key not in existing_sims):
|
||||
if key not in existing_sims:
|
||||
benchmark_data["simulators"] = existing_sims + [key]
|
||||
|
||||
# Save to file
|
||||
@ -104,178 +249,40 @@ class Autotuner:
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
assert issubclass(simulator, Simulator.BaseSimulator)
|
||||
key = simulator.__name__
|
||||
|
||||
if (key in self.performance):
|
||||
|
||||
if key in self.performance:
|
||||
return self.performance[key]
|
||||
else:
|
||||
#Run simulation if required
|
||||
if (not os.path.isfile(self.filename)):
|
||||
# Run simulation if required
|
||||
if not os.path.isfile(self.filename):
|
||||
logger.debug("Could not get autotuned peak performance for %s: benchmarking", key)
|
||||
self.benchmark(simulator)
|
||||
|
||||
|
||||
with np.load(self.filename) as data:
|
||||
if key not in data['simulators']:
|
||||
logger.debug("Could not get autotuned peak performance for %s: benchmarking", key)
|
||||
data.close()
|
||||
self.benchmark(simulator)
|
||||
data = np.load(self.filename)
|
||||
|
||||
|
||||
def find_max_index(megacells):
|
||||
max_index = np.nanargmax(megacells)
|
||||
return np.unravel_index(max_index, megacells.shape)
|
||||
|
||||
|
||||
megacells = data[key + '_megacells']
|
||||
block_widths = data[key + '_block_widths']
|
||||
block_heights = data[key + '_block_heights']
|
||||
j, i = find_max_index(megacells)
|
||||
|
||||
self.performance[key] = { "block_width": block_widths[i],
|
||||
|
||||
self.performance[key] = {"block_width": block_widths[i],
|
||||
"block_height": block_heights[j],
|
||||
"megacells": megacells[j, i] }
|
||||
"megacells": megacells[j, i]}
|
||||
logger.debug("Returning %s as peak performance parameters", self.performance[key])
|
||||
return self.performance[key]
|
||||
|
||||
#This should never happen
|
||||
|
||||
# This should never happen
|
||||
raise "Something wrong: Could not get autotuning data!"
|
||||
return None
|
||||
|
||||
def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
|
||||
"""
|
||||
Runs a set of benchmarks for a single simulator
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
megacells = np.empty((len(block_heights), len(block_widths)))
|
||||
megacells.fill(np.nan)
|
||||
|
||||
logger.debug("Running %d benchmarks with %s", len(block_heights)*len(block_widths), simulator.__name__)
|
||||
|
||||
sim_arguments = arguments.copy()
|
||||
|
||||
with Common.Timer(simulator.__name__) as t:
|
||||
for j, block_height in enumerate(tqdm(block_heights, desc='Autotuner Progress')):
|
||||
sim_arguments.update({'block_height': block_height})
|
||||
for i, block_width in enumerate(tqdm(block_widths, desc=f'Iteration {j} Progress', leave=False)):
|
||||
sim_arguments.update({'block_width': block_width})
|
||||
megacells[j, i] = Autotuner.run_benchmark(simulator, sim_arguments)
|
||||
|
||||
|
||||
logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs)
|
||||
|
||||
return megacells
|
||||
|
||||
def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
|
||||
"""
|
||||
Runs a benchmark, and returns the number of megacells achieved
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
#Initialize simulator
|
||||
try:
|
||||
sim = simulator(**arguments)
|
||||
except:
|
||||
#An exception raised - not possible to continue
|
||||
logger.debug("Failed creating %s with arguments %s", simulator.__name__, str(arguments))
|
||||
# raise RuntimeError("Failed creating %s with arguments %s", simulator.__name__, str(arguments))
|
||||
return np.nan
|
||||
|
||||
#Create timer events
|
||||
start = cuda.Event()
|
||||
end = cuda.Event()
|
||||
|
||||
#Warmup
|
||||
for i in range(warmup_timesteps):
|
||||
sim.substep(sim.dt, i)
|
||||
|
||||
#Run simulation with timer
|
||||
start.record(sim.stream)
|
||||
for i in range(timesteps):
|
||||
sim.substep(sim.dt, i)
|
||||
end.record(sim.stream)
|
||||
|
||||
#Synchronize end event
|
||||
end.synchronize()
|
||||
|
||||
#Compute megacells
|
||||
gpu_elapsed = end.time_since(start)*1.0e-3
|
||||
megacells = (sim.nx*sim.ny*timesteps / (1000*1000)) / gpu_elapsed
|
||||
|
||||
#Sanity check solution
|
||||
h, hu, hv = sim.download()
|
||||
sane = True
|
||||
sane = sane and Autotuner.sanity_check(h, 0.3, 0.7)
|
||||
sane = sane and Autotuner.sanity_check(hu, -0.2, 0.2)
|
||||
sane = sane and Autotuner.sanity_check(hv, -0.2, 0.2)
|
||||
|
||||
if (sane):
|
||||
logger.debug("%s [%d x %d] succeeded: %f megacells, gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], megacells, gpu_elapsed)
|
||||
return megacells
|
||||
else:
|
||||
logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
|
||||
# raise RuntimeError("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
|
||||
return np.nan
|
||||
|
||||
def gen_test_data(nx, ny, g):
|
||||
"""
|
||||
Generates test dataset
|
||||
"""
|
||||
|
||||
width = 100.0
|
||||
height = 100.0
|
||||
dx = width / float(nx)
|
||||
dy = height / float(ny)
|
||||
|
||||
x_center = dx*nx/2.0
|
||||
y_center = dy*ny/2.0
|
||||
|
||||
#Create a gaussian "dam break" that will not form shocks
|
||||
size = width / 5.0
|
||||
dt = 10**10
|
||||
|
||||
h = np.zeros((ny, nx), dtype=np.float32);
|
||||
hu = np.zeros((ny, nx), dtype=np.float32);
|
||||
hv = np.zeros((ny, nx), dtype=np.float32);
|
||||
|
||||
extent = 1.0/np.sqrt(2.0)
|
||||
x = (dx*(np.arange(0, nx, dtype=np.float32)+0.5) - x_center) / size
|
||||
y = (dy*(np.arange(0, ny, dtype=np.float32)+0.5) - y_center) / size
|
||||
xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
|
||||
r = np.minimum(1.0, np.sqrt(xv**2 + yv**2))
|
||||
xv = None
|
||||
yv = None
|
||||
gc.collect()
|
||||
|
||||
#Generate highres
|
||||
cos = np.cos(np.pi*r)
|
||||
h = 0.5 + 0.1*0.5*(1.0 + cos)
|
||||
hu = 0.1*0.5*(1.0 + cos)
|
||||
hv = hu.copy()
|
||||
|
||||
scale = 0.7
|
||||
max_h_estimate = 0.6
|
||||
max_u_estimate = 0.1*np.sqrt(2.0)
|
||||
dx = width/nx
|
||||
dy = height/ny
|
||||
dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(g*max_h_estimate))
|
||||
|
||||
return h, hu, hv, dx, dy, dt
|
||||
|
||||
def sanity_check(variable, bound_min, bound_max):
|
||||
"""
|
||||
Checks that a variable is "sane"
|
||||
"""
|
||||
|
||||
maxval = np.amax(variable)
|
||||
minval = np.amin(variable)
|
||||
if (np.isnan(maxval)
|
||||
or np.isnan(minval)
|
||||
or maxval > bound_max
|
||||
or minval < bound_min):
|
||||
return False
|
||||
else:
|
||||
return True
|
@ -1,758 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the different helper functions and
|
||||
classes
|
||||
|
||||
Copyright (C) 2018 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
import signal
|
||||
import subprocess
|
||||
import tempfile
|
||||
import re
|
||||
import io
|
||||
import hashlib
|
||||
import logging
|
||||
import gc
|
||||
import netCDF4
|
||||
import json
|
||||
|
||||
import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
from pycuda.tools import PageLockedMemoryPool
|
||||
|
||||
|
||||
def safeCall(cmd):
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
#git rev-parse HEAD
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
params = dict()
|
||||
params['stderr'] = subprocess.STDOUT
|
||||
params['cwd'] = current_dir
|
||||
params['universal_newlines'] = True #text=True in more recent python
|
||||
params['shell'] = False
|
||||
if os.name == 'nt':
|
||||
params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
stdout = subprocess.check_output(cmd, **params)
|
||||
except subprocess.CalledProcessError as e:
|
||||
output = e.output
|
||||
logger.error("Git failed, \nReturn code: " + str(e.returncode) + "\nOutput: " + output)
|
||||
raise e
|
||||
|
||||
return stdout
|
||||
|
||||
|
||||
def getGitHash():
|
||||
return safeCall(["git", "rev-parse", "HEAD"])
|
||||
|
||||
|
||||
def getGitStatus():
|
||||
return safeCall(["git", "status", "--porcelain", "-uno"])
|
||||
|
||||
|
||||
def toJson(in_dict, compressed=True):
|
||||
"""
|
||||
Creates JSON string from a dictionary
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
out_dict = in_dict.copy()
|
||||
for key in out_dict:
|
||||
if isinstance(out_dict[key], np.ndarray):
|
||||
out_dict[key] = out_dict[key].tolist()
|
||||
else:
|
||||
try:
|
||||
json.dumps(out_dict[key])
|
||||
except:
|
||||
value = str(out_dict[key])
|
||||
logger.warning("JSON: Converting {:s} to string ({:s})".format(key, value))
|
||||
out_dict[key] = value
|
||||
return json.dumps(out_dict)
|
||||
|
||||
|
||||
def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None):
|
||||
"""
|
||||
Runs a simulation, and stores output in netcdf file. Stores the times given in
|
||||
save_times, and saves all of the variables in list save_var_names. Elements in
|
||||
save_var_names can be set to None if you do not want to save them
|
||||
"""
|
||||
|
||||
profiling_data_sim_runner = { 'start': {}, 'end': {} }
|
||||
profiling_data_sim_runner["start"]["t_sim_init"] = 0
|
||||
profiling_data_sim_runner["end"]["t_sim_init"] = 0
|
||||
profiling_data_sim_runner["start"]["t_nc_write"] = 0
|
||||
profiling_data_sim_runner["end"]["t_nc_write"] = 0
|
||||
profiling_data_sim_runner["start"]["t_full_step"] = 0
|
||||
profiling_data_sim_runner["end"]["t_full_step"] = 0
|
||||
|
||||
profiling_data_sim_runner["start"]["t_sim_init"] = time.time()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
assert len(save_times) > 0, "Need to specify which times to save"
|
||||
|
||||
with Timer("construct") as t:
|
||||
sim = simulator(**simulator_args)
|
||||
logger.info("Constructed in " + str(t.secs) + " seconds")
|
||||
|
||||
#Create netcdf file and simulate
|
||||
with DataDumper(outfile, mode='w', clobber=False) as outdata:
|
||||
|
||||
#Create attributes (metadata)
|
||||
outdata.ncfile.created = time.ctime(time.time())
|
||||
outdata.ncfile.git_hash = getGitHash()
|
||||
outdata.ncfile.git_status = getGitStatus()
|
||||
outdata.ncfile.simulator = str(simulator)
|
||||
|
||||
# do not write fields to attributes (they are to large)
|
||||
simulator_args_for_ncfile = simulator_args.copy()
|
||||
del simulator_args_for_ncfile["rho"]
|
||||
del simulator_args_for_ncfile["rho_u"]
|
||||
del simulator_args_for_ncfile["rho_v"]
|
||||
del simulator_args_for_ncfile["E"]
|
||||
outdata.ncfile.sim_args = toJson(simulator_args_for_ncfile)
|
||||
|
||||
#Create dimensions
|
||||
outdata.ncfile.createDimension('time', len(save_times))
|
||||
outdata.ncfile.createDimension('x', simulator_args['nx'])
|
||||
outdata.ncfile.createDimension('y', simulator_args['ny'])
|
||||
|
||||
#Create variables for dimensions
|
||||
ncvars = {}
|
||||
ncvars['time'] = outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time')
|
||||
ncvars['x'] = outdata.ncfile.createVariable( 'x', np.dtype('float32').char, 'x')
|
||||
ncvars['y'] = outdata.ncfile.createVariable( 'y', np.dtype('float32').char, 'y')
|
||||
|
||||
#Fill variables with proper values
|
||||
ncvars['time'][:] = save_times
|
||||
extent = sim.getExtent()
|
||||
ncvars['x'][:] = np.linspace(extent[0], extent[1], simulator_args['nx'])
|
||||
ncvars['y'][:] = np.linspace(extent[2], extent[3], simulator_args['ny'])
|
||||
|
||||
#Choose which variables to download (prune None from list, but keep the index)
|
||||
download_vars = []
|
||||
for i, var_name in enumerate(save_var_names):
|
||||
if var_name is not None:
|
||||
download_vars += [i]
|
||||
save_var_names = list(save_var_names[i] for i in download_vars)
|
||||
|
||||
#Create variables
|
||||
for var_name in save_var_names:
|
||||
ncvars[var_name] = outdata.ncfile.createVariable(var_name, np.dtype('float32').char, ('time', 'y', 'x'), zlib=True, least_significant_digit=3)
|
||||
|
||||
#Create step sizes between each save
|
||||
t_steps = np.empty_like(save_times)
|
||||
t_steps[0] = save_times[0]
|
||||
t_steps[1:] = save_times[1:] - save_times[0:-1]
|
||||
|
||||
profiling_data_sim_runner["end"]["t_sim_init"] = time.time()
|
||||
|
||||
#Start simulation loop
|
||||
progress_printer = ProgressPrinter(save_times[-1], print_every=10)
|
||||
for k in range(len(save_times)):
|
||||
#Get target time and step size there
|
||||
t_step = t_steps[k]
|
||||
t_end = save_times[k]
|
||||
|
||||
#Sanity check simulator
|
||||
try:
|
||||
sim.check()
|
||||
except AssertionError as e:
|
||||
logger.error("Error after {:d} steps (t={:f}: {:s}".format(sim.simSteps(), sim.simTime(), str(e)))
|
||||
return outdata.filename
|
||||
|
||||
profiling_data_sim_runner["start"]["t_full_step"] += time.time()
|
||||
|
||||
#Simulate
|
||||
if (t_step > 0.0):
|
||||
sim.simulate(t_step, dt)
|
||||
|
||||
profiling_data_sim_runner["end"]["t_full_step"] += time.time()
|
||||
|
||||
profiling_data_sim_runner["start"]["t_nc_write"] += time.time()
|
||||
|
||||
#Download
|
||||
save_vars = sim.download(download_vars)
|
||||
|
||||
#Save to file
|
||||
for i, var_name in enumerate(save_var_names):
|
||||
ncvars[var_name][k, :] = save_vars[i]
|
||||
|
||||
profiling_data_sim_runner["end"]["t_nc_write"] += time.time()
|
||||
|
||||
#Write progress to screen
|
||||
print_string = progress_printer.getPrintString(t_end)
|
||||
if (print_string):
|
||||
logger.debug(print_string)
|
||||
|
||||
logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(t_end, sim.simSteps(), sim.simTime() / sim.simSteps()))
|
||||
|
||||
return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi
|
||||
|
||||
|
||||
class Timer(object):
|
||||
"""
|
||||
Class which keeps track of time spent for a section of code
|
||||
"""
|
||||
|
||||
def __init__(self, tag, log_level=logging.DEBUG):
|
||||
self.tag = tag
|
||||
self.log_level = log_level
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def __enter__(self):
|
||||
self.start = time.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.end = time.time()
|
||||
self.secs = self.end - self.start
|
||||
self.msecs = self.secs * 1000 # millisecs
|
||||
self.logger.log(self.log_level, "%s: %f ms", self.tag, self.msecs)
|
||||
|
||||
def elapsed(self):
|
||||
return time.time() - self.start
|
||||
|
||||
|
||||
class PopenFileBuffer(object):
|
||||
"""
|
||||
Simple class for holding a set of tempfiles
|
||||
for communicating with a subprocess
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.stdout = tempfile.TemporaryFile(mode='w+t')
|
||||
self.stderr = tempfile.TemporaryFile(mode='w+t')
|
||||
|
||||
def __del__(self):
|
||||
self.stdout.close()
|
||||
self.stderr.close()
|
||||
|
||||
def read(self):
|
||||
self.stdout.seek(0)
|
||||
cout = self.stdout.read()
|
||||
self.stdout.seek(0, 2)
|
||||
|
||||
self.stderr.seek(0)
|
||||
cerr = self.stderr.read()
|
||||
self.stderr.seek(0, 2)
|
||||
|
||||
return cout, cerr
|
||||
|
||||
|
||||
class IPEngine(object):
|
||||
"""
|
||||
Class for starting IPEngines for MPI processing in IPython
|
||||
"""
|
||||
|
||||
def __init__(self, n_engines):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
#Start ipcontroller
|
||||
self.logger.info("Starting IPController")
|
||||
self.c_buff = PopenFileBuffer()
|
||||
c_cmd = ["ipcontroller", "--ip='*'"]
|
||||
c_params = dict()
|
||||
c_params['stderr'] = self.c_buff.stderr
|
||||
c_params['stdout'] = self.c_buff.stdout
|
||||
c_params['shell'] = False
|
||||
if os.name == 'nt':
|
||||
c_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
self.c = subprocess.Popen(c_cmd, **c_params)
|
||||
|
||||
#Wait until controller is running
|
||||
time.sleep(3)
|
||||
|
||||
#Start engines
|
||||
self.logger.info("Starting IPEngines")
|
||||
self.e_buff = PopenFileBuffer()
|
||||
e_cmd = ["mpiexec", "-n", str(n_engines), "ipengine", "--mpi"]
|
||||
e_params = dict()
|
||||
e_params['stderr'] = self.e_buff.stderr
|
||||
e_params['stdout'] = self.e_buff.stdout
|
||||
e_params['shell'] = False
|
||||
if os.name == 'nt':
|
||||
e_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
self.e = subprocess.Popen(e_cmd, **e_params)
|
||||
|
||||
# attach to a running cluster
|
||||
import ipyparallel
|
||||
self.cluster = ipyparallel.Client()#profile='mpi')
|
||||
time.sleep(3)
|
||||
while(len(self.cluster.ids) != n_engines):
|
||||
time.sleep(0.5)
|
||||
self.logger.info("Waiting for cluster...")
|
||||
self.cluster = ipyparallel.Client()#profile='mpi')
|
||||
|
||||
self.logger.info("Done")
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self):
|
||||
if (self.e is not None):
|
||||
if (os.name == 'nt'):
|
||||
self.logger.warn("Sending CTRL+C to IPEngine")
|
||||
self.e.send_signal(signal.CTRL_C_EVENT)
|
||||
|
||||
try:
|
||||
self.e.communicate(timeout=3)
|
||||
self.e.kill()
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.warn("Killing IPEngine")
|
||||
self.e.kill()
|
||||
self.e.communicate()
|
||||
self.e = None
|
||||
|
||||
cout, cerr = self.e_buff.read()
|
||||
self.logger.info("IPEngine cout: {:s}".format(cout))
|
||||
self.logger.info("IPEngine cerr: {:s}".format(cerr))
|
||||
self.e_buff = None
|
||||
|
||||
gc.collect()
|
||||
|
||||
if (self.c is not None):
|
||||
if (os.name == 'nt'):
|
||||
self.logger.warn("Sending CTRL+C to IPController")
|
||||
self.c.send_signal(signal.CTRL_C_EVENT)
|
||||
|
||||
try:
|
||||
self.c.communicate(timeout=3)
|
||||
self.c.kill()
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.warn("Killing IPController")
|
||||
self.c.kill()
|
||||
self.c.communicate()
|
||||
self.c = None
|
||||
|
||||
cout, cerr = self.c_buff.read()
|
||||
self.logger.info("IPController cout: {:s}".format(cout))
|
||||
self.logger.info("IPController cerr: {:s}".format(cerr))
|
||||
self.c_buff = None
|
||||
|
||||
gc.collect()
|
||||
|
||||
|
||||
class DataDumper(object):
|
||||
"""
|
||||
Simple class for holding a netCDF4 object
|
||||
(handles opening and closing in a nice way)
|
||||
Use as
|
||||
with DataDumper("filename") as data:
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, filename, *args, **kwargs):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
#Create directory if needed
|
||||
filename = os.path.abspath(filename)
|
||||
dirname = os.path.dirname(filename)
|
||||
if dirname and not os.path.isdir(dirname):
|
||||
self.logger.info("Creating directory " + dirname)
|
||||
os.makedirs(dirname)
|
||||
|
||||
#Get mode of file if we have that
|
||||
mode = None
|
||||
if (args):
|
||||
mode = args[0]
|
||||
elif (kwargs and 'mode' in kwargs.keys()):
|
||||
mode = kwargs['mode']
|
||||
|
||||
#Create new unique file if writing
|
||||
if (mode):
|
||||
if (("w" in mode) or ("+" in mode) or ("a" in mode)):
|
||||
i = 0
|
||||
stem, ext = os.path.splitext(filename)
|
||||
while (os.path.isfile(filename)):
|
||||
filename = "{:s}_{:04d}{:s}".format(stem, i, ext)
|
||||
i = i+1
|
||||
self.filename = os.path.abspath(filename)
|
||||
|
||||
#Save arguments
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
#Log output
|
||||
self.logger.info("Initialized " + self.filename)
|
||||
|
||||
def __enter__(self):
|
||||
self.logger.info("Opening " + self.filename)
|
||||
if (self.args):
|
||||
self.logger.info("Arguments: " + str(self.args))
|
||||
if (self.kwargs):
|
||||
self.logger.info("Keyword arguments: " + str(self.kwargs))
|
||||
self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs)
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.logger.info("Closing " + self.filename)
|
||||
self.ncfile.close()
|
||||
|
||||
def toJson(in_dict):
|
||||
out_dict = in_dict.copy()
|
||||
|
||||
for key in out_dict:
|
||||
if isinstance(out_dict[key], np.ndarray):
|
||||
out_dict[key] = out_dict[key].tolist()
|
||||
else:
|
||||
try:
|
||||
json.dumps(out_dict[key])
|
||||
except:
|
||||
out_dict[key] = str(out_dict[key])
|
||||
|
||||
return json.dumps(out_dict)
|
||||
|
||||
|
||||
class ProgressPrinter(object):
|
||||
"""
|
||||
Small helper class for
|
||||
"""
|
||||
|
||||
def __init__(self, total_steps, print_every=5):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.start = time.time()
|
||||
self.total_steps = total_steps
|
||||
self.print_every = print_every
|
||||
self.next_print_time = self.print_every
|
||||
self.last_step = 0
|
||||
self.secs_per_iter = None
|
||||
|
||||
def getPrintString(self, step):
|
||||
elapsed = time.time() - self.start
|
||||
if (elapsed > self.next_print_time):
|
||||
dt = elapsed - (self.next_print_time - self.print_every)
|
||||
dsteps = step - self.last_step
|
||||
steps_remaining = self.total_steps - step
|
||||
|
||||
if (dsteps == 0):
|
||||
return
|
||||
|
||||
self.last_step = step
|
||||
self.next_print_time = elapsed + self.print_every
|
||||
|
||||
if not self.secs_per_iter:
|
||||
self.secs_per_iter = dt / dsteps
|
||||
self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps)
|
||||
|
||||
remaining_time = steps_remaining * self.secs_per_iter
|
||||
|
||||
return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format(
|
||||
ProgressPrinter.progressBar(step, self.total_steps),
|
||||
ProgressPrinter.timeString(elapsed + remaining_time),
|
||||
ProgressPrinter.timeString(elapsed),
|
||||
ProgressPrinter.timeString(remaining_time))
|
||||
|
||||
def timeString(seconds):
|
||||
seconds = int(max(seconds, 1))
|
||||
minutes, seconds = divmod(seconds, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
periods = [('h', hours), ('m', minutes), ('s', seconds)]
|
||||
time_string = ' '.join('{}{}'.format(value, name)
|
||||
for name, value in periods
|
||||
if value)
|
||||
return time_string
|
||||
|
||||
def progressBar(step, total_steps, width=30):
|
||||
progress = np.round(width * step / total_steps).astype(np.int32)
|
||||
progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%"
|
||||
return progressbar
|
||||
|
||||
|
||||
class CudaArray2D:
|
||||
"""
|
||||
Class that holds 2D data
|
||||
"""
|
||||
|
||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
|
||||
"""
|
||||
Uploads initial data to the CUDA device
|
||||
"""
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
self.x_halo = x_halo
|
||||
self.y_halo = y_halo
|
||||
|
||||
nx_halo = nx + 2*x_halo
|
||||
ny_halo = ny + 2*y_halo
|
||||
|
||||
#self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||
self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
|
||||
|
||||
#For returning to download
|
||||
self.memorypool = PageLockedMemoryPool()
|
||||
|
||||
#If we don't have any data, just allocate and return
|
||||
if cpu_data is None:
|
||||
return
|
||||
|
||||
#Make sure data is in proper format
|
||||
assert cpu_data.shape == (ny_halo, nx_halo) or cpu_data.shape == (self.ny, self.nx), "Wrong shape of data %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo)))
|
||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||
|
||||
#Create copy object from host to device
|
||||
x = (nx_halo - cpu_data.shape[1]) // 2
|
||||
y = (ny_halo - cpu_data.shape[0]) // 2
|
||||
self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
|
||||
#self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
|
||||
|
||||
def __del__(self, *args):
|
||||
#self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
|
||||
self.data.gpudata.free()
|
||||
self.data = None
|
||||
|
||||
def download(self, stream, cpu_data=None, asynch=False, extent=None):
|
||||
"""
|
||||
Enables downloading data from GPU to Python
|
||||
"""
|
||||
|
||||
if (extent is None):
|
||||
x = self.x_halo
|
||||
y = self.y_halo
|
||||
nx = self.nx
|
||||
ny = self.ny
|
||||
else:
|
||||
x, y, nx, ny = extent
|
||||
|
||||
if (cpu_data is None):
|
||||
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||
#Allocate host memory
|
||||
#The following fails, don't know why (crashes python)
|
||||
cpu_data = cuda.pagelocked_empty((int(ny), int(nx)), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE)
|
||||
#Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32)
|
||||
#cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32)
|
||||
|
||||
assert nx == cpu_data.shape[1]
|
||||
assert ny == cpu_data.shape[0]
|
||||
assert x+nx <= self.nx + 2*self.x_halo
|
||||
assert y+ny <= self.ny + 2*self.y_halo
|
||||
|
||||
#Create copy object from device to host
|
||||
copy = cuda.Memcpy2D()
|
||||
copy.set_src_device(self.data.gpudata)
|
||||
copy.set_dst_host(cpu_data)
|
||||
|
||||
#Set offsets and pitch of source
|
||||
copy.src_x_in_bytes = int(x)*self.data.strides[1]
|
||||
copy.src_y = int(y)
|
||||
copy.src_pitch = self.data.strides[0]
|
||||
|
||||
#Set width in bytes to copy for each row and
|
||||
#number of rows to copy
|
||||
copy.width_in_bytes = int(nx)*cpu_data.itemsize
|
||||
copy.height = int(ny)
|
||||
|
||||
copy(stream)
|
||||
if asynch==False:
|
||||
stream.synchronize()
|
||||
|
||||
return cpu_data
|
||||
|
||||
def upload(self, stream, cpu_data, extent=None):
|
||||
if (extent is None):
|
||||
x = self.x_halo
|
||||
y = self.y_halo
|
||||
nx = self.nx
|
||||
ny = self.ny
|
||||
else:
|
||||
x, y, nx, ny = extent
|
||||
|
||||
assert(nx == cpu_data.shape[1])
|
||||
assert(ny == cpu_data.shape[0])
|
||||
assert(x+nx <= self.nx + 2*self.x_halo)
|
||||
assert(y+ny <= self.ny + 2*self.y_halo)
|
||||
|
||||
#Create copy object from device to host
|
||||
copy = cuda.Memcpy2D()
|
||||
copy.set_dst_device(self.data.gpudata)
|
||||
copy.set_src_host(cpu_data)
|
||||
|
||||
#Set offsets and pitch of source
|
||||
copy.dst_x_in_bytes = int(x)*self.data.strides[1]
|
||||
copy.dst_y = int(y)
|
||||
copy.dst_pitch = self.data.strides[0]
|
||||
|
||||
#Set width in bytes to copy for each row and
|
||||
#number of rows to copy
|
||||
copy.width_in_bytes = int(nx)*cpu_data.itemsize
|
||||
copy.height = int(ny)
|
||||
|
||||
copy(stream)
|
||||
|
||||
|
||||
class CudaArray3D:
|
||||
"""
|
||||
Class that holds 3D data
|
||||
"""
|
||||
|
||||
def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32):
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
self.nz = nz
|
||||
self.x_halo = x_halo
|
||||
self.y_halo = y_halo
|
||||
self.z_halo = z_halo
|
||||
|
||||
nx_halo = nx + 2*x_halo
|
||||
ny_halo = ny + 2*y_halo
|
||||
nz_halo = nz + 2*z_halo
|
||||
|
||||
#self.logger.debug("Allocating [%dx%dx%d] buffer", self.nx, self.ny, self.nz)
|
||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||
self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
|
||||
|
||||
#For returning to download
|
||||
self.memorypool = PageLockedMemoryPool()
|
||||
|
||||
#If we don't have any data, just allocate and return
|
||||
if cpu_data is None:
|
||||
return
|
||||
|
||||
#Make sure data is in proper format
|
||||
assert cpu_data.shape == (nz_halo, ny_halo, nx_halo) or cpu_data.shape == (self.nz, self.ny, self.nx), "Wrong shape of data %s vs %s / %s" % (str(cpu_data.shape), str((self.nz, self.ny, self.nx)), str((nz_halo, ny_halo, nx_halo)))
|
||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||
|
||||
#Create copy object from host to device
|
||||
copy = cuda.Memcpy3D()
|
||||
copy.set_src_host(cpu_data)
|
||||
copy.set_dst_device(self.data.gpudata)
|
||||
|
||||
#Set offsets of destination
|
||||
x_offset = (nx_halo - cpu_data.shape[2]) // 2
|
||||
y_offset = (ny_halo - cpu_data.shape[1]) // 2
|
||||
z_offset = (nz_halo - cpu_data.shape[0]) // 2
|
||||
copy.dst_x_in_bytes = x_offset*self.data.strides[1]
|
||||
copy.dst_y = y_offset
|
||||
copy.dst_z = z_offset
|
||||
|
||||
#Set pitch of destination
|
||||
copy.dst_pitch = self.data.strides[0]
|
||||
|
||||
#Set width in bytes to copy for each row and
|
||||
#number of rows to copy
|
||||
width = max(self.nx, cpu_data.shape[2])
|
||||
height = max(self.ny, cpu_data.shape[1])
|
||||
depth = max(self.nz, cpu-data.shape[0])
|
||||
copy.width_in_bytes = width*cpu_data.itemsize
|
||||
copy.height = height
|
||||
copy.depth = depth
|
||||
|
||||
#Perform the copy
|
||||
copy(stream)
|
||||
|
||||
#self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
|
||||
|
||||
def __del__(self, *args):
|
||||
#self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
|
||||
self.data.gpudata.free()
|
||||
self.data = None
|
||||
|
||||
def download(self, stream, asynch=False):
|
||||
"""
|
||||
Enables downloading data from GPU to Python
|
||||
"""
|
||||
|
||||
#self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||
#Allocate host memory
|
||||
#cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
|
||||
#cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32)
|
||||
cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32)
|
||||
|
||||
#Create copy object from device to host
|
||||
copy = cuda.Memcpy2D()
|
||||
copy.set_src_device(self.data.gpudata)
|
||||
copy.set_dst_host(cpu_data)
|
||||
|
||||
#Set offsets and pitch of source
|
||||
copy.src_x_in_bytes = self.x_halo*self.data.strides[1]
|
||||
copy.src_y = self.y_halo
|
||||
copy.src_z = self.z_halo
|
||||
copy.src_pitch = self.data.strides[0]
|
||||
|
||||
#Set width in bytes to copy for each row and
|
||||
#number of rows to copy
|
||||
copy.width_in_bytes = self.nx*cpu_data.itemsize
|
||||
copy.height = self.ny
|
||||
copy.depth = self.nz
|
||||
|
||||
copy(stream)
|
||||
if asynch==False:
|
||||
stream.synchronize()
|
||||
|
||||
return cpu_data
|
||||
|
||||
|
||||
class ArakawaA2D:
|
||||
"""
|
||||
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
||||
"""
|
||||
|
||||
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
|
||||
"""
|
||||
Uploads initial data to the GPU device
|
||||
"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.gpu_variables = []
|
||||
for cpu_variable in cpu_variables:
|
||||
self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
|
||||
|
||||
def __getitem__(self, key):
|
||||
assert type(key) == int, "Indexing is int based"
|
||||
if (key > len(self.gpu_variables) or key < 0):
|
||||
raise IndexError("Out of bounds")
|
||||
return self.gpu_variables[key]
|
||||
|
||||
def download(self, stream, variables=None):
|
||||
"""
|
||||
Enables downloading data from the GPU device to Python
|
||||
"""
|
||||
if variables is None:
|
||||
variables=range(len(self.gpu_variables))
|
||||
|
||||
cpu_variables = []
|
||||
for i in variables:
|
||||
assert i < len(self.gpu_variables), "Variable {:d} is out of range".format(i)
|
||||
cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)]
|
||||
|
||||
#stream.synchronize()
|
||||
return cpu_variables
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
Checks that data is still sane
|
||||
"""
|
||||
for i, gpu_variable in enumerate(self.gpu_variables):
|
||||
var_sum = pycuda.gpuarray.sum(gpu_variable.data).get()
|
||||
self.logger.debug("Data %d with size [%d x %d] has average %f", i, gpu_variable.nx, gpu_variable.ny, var_sum / (gpu_variable.nx * gpu_variable.ny))
|
||||
assert np.isnan(var_sum) == False, "Data contains NaN values!"
|
||||
|
@ -19,29 +19,29 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
|
||||
class EE2D_KP07_dimsplit (BaseSimulator):
|
||||
|
||||
class EE2D_KP07_dimsplit(BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
context,
|
||||
rho, rho_u, rho_v, E,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
gamma,
|
||||
theta=1.3,
|
||||
def __init__(self,
|
||||
context,
|
||||
rho, rho_u, rho_v, E,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
gamma,
|
||||
theta=1.3,
|
||||
cfl_scale=0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=8):
|
||||
"""
|
||||
Initialization routine
|
||||
@ -60,77 +60,76 @@ class EE2D_KP07_dimsplit (BaseSimulator):
|
||||
gamma: Gas constant
|
||||
p: pressure
|
||||
"""
|
||||
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height)
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
self.gamma = np.float32(gamma)
|
||||
self.theta = np.float32(theta)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Get kernels
|
||||
module = context.get_module("cuda/EE2D_KP07_dimsplit.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"],
|
||||
},
|
||||
jit_compile_args={})
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/EE2D_KP07_dimsplit.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"],
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("KP07DimsplitKernel")
|
||||
self.kernel.prepare("iiffffffiiPiPiPiPiPiPiPiPiPiiii")
|
||||
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[rho, rho_u, rho_v, E])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[rho, rho_u, rho_v, E])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
dt_x = np.min(self.dx / (np.abs(rho_u/rho) + np.sqrt(gamma*rho)))
|
||||
dt_y = np.min(self.dy / (np.abs(rho_v/rho) + np.sqrt(gamma*rho)))
|
||||
dt_x = np.min(self.dx / (np.abs(rho_u / rho) + np.sqrt(gamma * rho)))
|
||||
dt_y = np.min(self.dy / (np.abs(rho_v / rho) + np.sqrt(gamma * rho)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
def substep(self, dt, step_number, external=True, internal=True):
|
||||
self.substepDimsplit(0.5*dt, step_number, external, internal)
|
||||
|
||||
def substepDimsplit(self, dt, substep, external, internal):
|
||||
if external and internal:
|
||||
#print("COMPLETE DOMAIN (dt=" + str(dt) + ")")
|
||||
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
def substep(self, dt, step_number, external=True, internal=True):
|
||||
self.substep_dimsplit(0.5 * dt, step_number, external, internal)
|
||||
|
||||
def substep_dimsplit(self, dt, substep, external, internal):
|
||||
if external and internal:
|
||||
# print("COMPLETE DOMAIN (dt=" + str(dt) + ")")
|
||||
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
return
|
||||
|
||||
|
||||
if external and not internal:
|
||||
###################################
|
||||
# XXX: Corners are treated twice! #
|
||||
@ -141,136 +140,135 @@ class EE2D_KP07_dimsplit (BaseSimulator):
|
||||
# NORTH
|
||||
# (x0, y0) x (x1, y1)
|
||||
# (0, ny-y_halo) x (nx, ny)
|
||||
self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, self.ny - int(self.u0[0].y_halo),
|
||||
self.nx, self.ny)
|
||||
self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, self.ny - int(self.u0[0].y_halo),
|
||||
self.nx, self.ny)
|
||||
|
||||
# SOUTH
|
||||
# (x0, y0) x (x1, y1)
|
||||
# (0, 0) x (nx, y_halo)
|
||||
self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, int(self.u0[0].y_halo))
|
||||
|
||||
self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, int(self.u0[0].y_halo))
|
||||
|
||||
we_grid_size = (1, self.grid_size[1])
|
||||
|
||||
|
||||
# WEST
|
||||
# (x0, y0) x (x1, y1)
|
||||
# (0, 0) x (x_halo, ny)
|
||||
self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
int(self.u0[0].x_halo), self.ny)
|
||||
self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
int(self.u0[0].x_halo), self.ny)
|
||||
|
||||
# EAST
|
||||
# (x0, y0) x (x1, y1)
|
||||
# (nx-x_halo, 0) x (nx, ny)
|
||||
self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
self.nx - int(self.u0[0].x_halo), 0,
|
||||
self.nx, self.ny)
|
||||
self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
self.nx - int(self.u0[0].x_halo), 0,
|
||||
self.nx, self.ny)
|
||||
return
|
||||
|
||||
if internal and not external:
|
||||
|
||||
# INTERNAL DOMAIN
|
||||
# (x0, y0) x (x1, y1)
|
||||
# (x_halo, y_halo) x (nx - x_halo, ny - y_halo)
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.internal_stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
int(self.u0[0].x_halo), int(self.u0[0].y_halo),
|
||||
self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo))
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.internal_stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.gamma,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u0[3].data.gpudata, self.u0[3].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.u1[3].data.gpudata, self.u1[3].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
int(self.u0[0].x_halo), int(self.u0[0].y_halo),
|
||||
self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo))
|
||||
return
|
||||
|
||||
def swapBuffers(self):
|
||||
def swap_buffers(self):
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
return
|
||||
|
||||
def getOutput(self):
|
||||
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
return
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5
|
||||
|
@ -20,30 +20,31 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
class FORCE (Simulator.BaseSimulator):
|
||||
|
||||
class FORCE(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
cfl_scale=0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -59,76 +60,76 @@ class FORCE (Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
1,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Get kernels
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
1,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_FORCE.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("FORCEKernel")
|
||||
self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt
|
||||
|
@ -19,30 +19,31 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
class HLL (Simulator.BaseSimulator):
|
||||
|
||||
class HLL(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
cfl_scale=0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -58,74 +59,74 @@ class HLL (Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
1,
|
||||
block_width, block_height);
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Get kernels
|
||||
module = context.get_module("cuda/SWE2D_HLL.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
1,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_HLL.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("HLLKernel")
|
||||
self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5
|
||||
|
@ -19,31 +19,32 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
class HLL2 (Simulator.BaseSimulator):
|
||||
|
||||
class HLL2(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
theta=1.8,
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
theta=1.8,
|
||||
cfl_scale=0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -59,81 +60,81 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height);
|
||||
self.g = np.float32(g)
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Get kernels
|
||||
module = context.get_module("cuda/SWE2D_HLL2.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_HLL2.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("HLL2Kernel")
|
||||
self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.substepDimsplit(dt*0.5, step_number)
|
||||
|
||||
def substepDimsplit(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.substep_dimsplit(dt * 0.5, step_number)
|
||||
|
||||
def substep_dimsplit(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5
|
||||
|
@ -26,12 +26,12 @@ from IPython.core import magic_arguments
|
||||
from IPython.core.magic import line_magic, Magics, magics_class
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from GPUSimulators import Common
|
||||
from GPUSimulators.common import IPEngine
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
|
||||
|
||||
@magics_class
|
||||
class MagicCudaContext(Magics):
|
||||
class MagicCudaContext(Magics):
|
||||
@line_magic
|
||||
@magic_arguments.magic_arguments()
|
||||
@magic_arguments.argument(
|
||||
@ -44,14 +44,14 @@ class MagicCudaContext(Magics):
|
||||
'--no_autotuning', '-na', action="store_true", help='Disable autotuning of kernels')
|
||||
def cuda_context_handler(self, line):
|
||||
args = magic_arguments.parse_argstring(self.cuda_context_handler, line)
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
self.logger.info("Registering %s in user workspace", args.name)
|
||||
|
||||
|
||||
context_flags = None
|
||||
if (args.blocking):
|
||||
if args.blocking:
|
||||
context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC
|
||||
|
||||
|
||||
if args.name in self.shell.user_ns.keys():
|
||||
self.logger.debug("Context already registered! Ignoring")
|
||||
return
|
||||
@ -59,12 +59,13 @@ class MagicCudaContext(Magics):
|
||||
self.logger.debug("Creating context")
|
||||
use_cache = False if args.no_cache else True
|
||||
use_autotuning = False if args.no_autotuning else True
|
||||
self.shell.user_ns[args.name] = CudaContext.CudaContext(context_flags=context_flags, use_cache=use_cache, autotuning=use_autotuning)
|
||||
|
||||
self.shell.user_ns[args.name] = CudaContext(context_flags=context_flags, use_cache=use_cache,
|
||||
autotuning=use_autotuning)
|
||||
|
||||
# this function will be called on exceptions in any cell
|
||||
def custom_exc(shell, etype, evalue, tb, tb_offset=None):
|
||||
self.logger.exception("Exception caught: Resetting to CUDA context %s", args.name)
|
||||
while (cuda.Context.get_current() != None):
|
||||
while cuda.Context.get_current() is not None:
|
||||
context = cuda.Context.get_current()
|
||||
self.logger.info("Popping <%s>", str(context.handle))
|
||||
cuda.Context.pop()
|
||||
@ -77,36 +78,30 @@ class MagicCudaContext(Magics):
|
||||
self.logger.error("CUDA will not work now")
|
||||
|
||||
self.logger.debug("==================================================================")
|
||||
|
||||
|
||||
# still show the error within the notebook, don't just swallow it
|
||||
shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset)
|
||||
|
||||
# this registers a custom exception handler for the whole current notebook
|
||||
get_ipython().set_custom_exc((Exception,), custom_exc)
|
||||
|
||||
|
||||
|
||||
# Handle CUDA context when exiting python
|
||||
import atexit
|
||||
def exitfunc():
|
||||
self.logger.info("Exitfunc: Resetting CUDA context stack")
|
||||
while (cuda.Context.get_current() != None):
|
||||
while cuda.Context.get_current() != None:
|
||||
context = cuda.Context.get_current()
|
||||
self.logger.info("`-> Popping <%s>", str(context.handle))
|
||||
cuda.Context.pop()
|
||||
self.logger.debug("==================================================================")
|
||||
|
||||
atexit.register(exitfunc)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@magics_class
|
||||
class MagicLogger(Magics):
|
||||
class MagicLogger(Magics):
|
||||
logger_initialized = False
|
||||
|
||||
|
||||
@line_magic
|
||||
@magic_arguments.magic_arguments()
|
||||
@magic_arguments.argument(
|
||||
@ -118,51 +113,47 @@ class MagicLogger(Magics):
|
||||
@magic_arguments.argument(
|
||||
'--file_level', '-f', type=int, default=10, help='The level of logging to file [0, 50]')
|
||||
def setup_logging(self, line):
|
||||
if (self.logger_initialized):
|
||||
if self.logger_initialized:
|
||||
logging.getLogger('GPUSimulators').info("Global logger already initialized!")
|
||||
return;
|
||||
return
|
||||
else:
|
||||
self.logger_initialized = True
|
||||
|
||||
|
||||
args = magic_arguments.parse_argstring(self.setup_logging, line)
|
||||
import sys
|
||||
|
||||
#Get root logger
|
||||
|
||||
# Get root logger
|
||||
logger = logging.getLogger('GPUSimulators')
|
||||
logger.setLevel(min(args.level, args.file_level))
|
||||
|
||||
#Add log to screen
|
||||
# Add log to screen
|
||||
ch = logging.StreamHandler()
|
||||
ch.setLevel(args.level)
|
||||
logger.addHandler(ch)
|
||||
logger.log(args.level, "Console logger using level %s", logging.getLevelName(args.level))
|
||||
|
||||
#Get the outfilename (try to evaluate if Python expression...)
|
||||
|
||||
# Get the outfilename (try to evaluate if Python expression...)
|
||||
try:
|
||||
outfile = eval(args.out, self.shell.user_global_ns, self.shell.user_ns)
|
||||
except:
|
||||
outfile = args.out
|
||||
|
||||
#Add log to file
|
||||
|
||||
# Add log to file
|
||||
logger.log(args.level, "File logger using level %s to %s", logging.getLevelName(args.file_level), outfile)
|
||||
|
||||
|
||||
fh = logging.FileHandler(outfile)
|
||||
formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s: %(message)s')
|
||||
fh.setFormatter(formatter)
|
||||
fh.setLevel(args.file_level)
|
||||
logger.addHandler(fh)
|
||||
|
||||
|
||||
logger.info("Python version %s", sys.version)
|
||||
self.shell.user_ns[args.name] = logger
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@magics_class
|
||||
class MagicMPI(Magics):
|
||||
|
||||
class MagicMPI(Magics):
|
||||
|
||||
@line_magic
|
||||
@magic_arguments.magic_arguments()
|
||||
@magic_arguments.argument(
|
||||
@ -177,13 +168,7 @@ class MagicMPI(Magics):
|
||||
self.shell.user_ns[args.name].shutdown()
|
||||
self.shell.user_ns[args.name] = None
|
||||
gc.collect()
|
||||
self.shell.user_ns[args.name] = Common.IPEngine(args.num_engines)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
self.shell.user_ns[args.name] = IPEngine(args.num_engines)
|
||||
|
||||
|
||||
# Register
|
||||
@ -191,4 +176,3 @@ ip = get_ipython()
|
||||
ip.register_magics(MagicCudaContext)
|
||||
ip.register_magics(MagicLogger)
|
||||
ip.register_magics(MagicMPI)
|
||||
|
||||
|
@ -24,32 +24,33 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
class KP07 (Simulator.BaseSimulator):
|
||||
|
||||
class KP07(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
theta=1.3,
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
theta=1.3,
|
||||
cfl_scale=0.9,
|
||||
order=2,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -65,84 +66,82 @@ class KP07 (Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
order,
|
||||
block_width, block_height);
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
order,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
self.order = np.int32(order)
|
||||
|
||||
#Get kernels
|
||||
module = context.get_module("cuda/SWE2D_KP07.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_KP07.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("KP07Kernel")
|
||||
self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.substepRK(dt, step_number)
|
||||
|
||||
|
||||
def substepRK(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.theta,
|
||||
Simulator.stepOrderToCodedInt(step=substep, order=self.order),
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.substep_rk(dt, step_number)
|
||||
|
||||
def substep_rk(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.theta,
|
||||
Simulator.step_order_to_coded_int(step=substep, order=self.order),
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5**(self.order-1)
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5 ** (self.order - 1)
|
||||
|
@ -24,31 +24,32 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
|
||||
class KP07_dimsplit(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the dimentionally split KP07 scheme
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
theta=1.3,
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
theta=1.3,
|
||||
cfl_scale=0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -64,83 +65,83 @@ class KP07_dimsplit(Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height)
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height)
|
||||
self.gc_x = 2
|
||||
self.gc_y = 2
|
||||
self.g = np.float32(g)
|
||||
self.theta = np.float32(theta)
|
||||
|
||||
#Get kernels
|
||||
module = context.get_module("cuda/SWE2D_KP07_dimsplit.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_KP07_dimsplit.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("KP07DimsplitKernel")
|
||||
self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
self.gc_x, self.gc_y,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
self.gc_x, self.gc_y,
|
||||
[None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
self.gc_x, self.gc_y,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
self.gc_x, self.gc_y,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.substepDimsplit(dt*0.5, step_number)
|
||||
|
||||
def substepDimsplit(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.substep_dimsplit(dt * 0.5, step_number)
|
||||
|
||||
def substep_dimsplit(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.theta,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5
|
||||
|
@ -20,16 +20,17 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
class LxF (Simulator.BaseSimulator):
|
||||
|
||||
class LxF(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Lax Friedrichs scheme
|
||||
"""
|
||||
@ -40,11 +41,11 @@ class LxF (Simulator.BaseSimulator):
|
||||
nx: int, ny: int,
|
||||
dx: int, dy: int,
|
||||
g: float,
|
||||
cfl_scale: float=0.9,
|
||||
cfl_scale: float = 0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width: int=16, block_height: int=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
block_width: int = 16, block_height: int = 16,
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -60,80 +61,80 @@ class LxF (Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
1,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
1,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_LxF.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
module = context.get_module("cuda/SWE2D_LxF.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("LxFKernel")
|
||||
self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[None, None, None])
|
||||
# Create data by uploading to thedevice
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
1, 1,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
"""
|
||||
Args:
|
||||
dt: Size of each timestep (seconds)
|
||||
"""
|
||||
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5
|
||||
|
@ -222,7 +222,7 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
|
||||
autotuner = sim.context.autotuner
|
||||
sim.context.autotuner = None;
|
||||
boundary_conditions = sim.getBoundaryConditions()
|
||||
boundary_conditions = sim.get_boundary_conditions()
|
||||
super().__init__(sim.context,
|
||||
sim.nx, sim.ny,
|
||||
sim.dx, sim.dy,
|
||||
@ -263,14 +263,14 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic):
|
||||
self.north = None
|
||||
new_boundary_conditions.north = boundary_conditions.north;
|
||||
sim.setBoundaryConditions(new_boundary_conditions)
|
||||
sim.set_boundary_conditions(new_boundary_conditions)
|
||||
|
||||
#Get number of variables
|
||||
self.nvars = len(self.getOutput().gpu_variables)
|
||||
self.nvars = len(self.get_output().gpu_variables)
|
||||
|
||||
#Shorthands for computing extents and sizes
|
||||
gc_x = int(self.sim.getOutput()[0].x_halo)
|
||||
gc_y = int(self.sim.getOutput()[0].y_halo)
|
||||
gc_x = int(self.sim.get_output()[0].x_halo)
|
||||
gc_y = int(self.sim.get_output()[0].y_halo)
|
||||
nx = int(self.sim.nx)
|
||||
ny = int(self.sim.ny)
|
||||
|
||||
@ -322,7 +322,7 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
#nvtx.mark("substep full", color="blue")
|
||||
#self.sim.substep(dt, step_number, external=True, internal=True)
|
||||
|
||||
self.sim.swapBuffers()
|
||||
self.sim.swap_buffers()
|
||||
|
||||
self.profiling_data_mpi["end"]["t_mpi_step"] += time.time()
|
||||
|
||||
@ -336,8 +336,8 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
|
||||
self.profiling_data_mpi["n_time_steps"] += 1
|
||||
|
||||
def getOutput(self):
|
||||
return self.sim.getOutput()
|
||||
def get_output(self):
|
||||
return self.sim.get_output()
|
||||
|
||||
def synchronize(self):
|
||||
self.sim.synchronize()
|
||||
@ -345,14 +345,14 @@ class MPISimulator(Simulator.BaseSimulator):
|
||||
def check(self):
|
||||
return self.sim.check()
|
||||
|
||||
def computeDt(self):
|
||||
local_dt = np.array([np.float32(self.sim.computeDt())]);
|
||||
def compute_dt(self):
|
||||
local_dt = np.array([np.float32(self.sim.compute_dt())]);
|
||||
global_dt = np.empty(1, dtype=np.float32)
|
||||
self.grid.comm.Allreduce(local_dt, global_dt, op=MPI.MIN)
|
||||
self.logger.debug("Local dt: {:f}, global dt: {:f}".format(local_dt[0], global_dt[0]))
|
||||
return global_dt[0]
|
||||
|
||||
def getExtent(self):
|
||||
def get_extent(self):
|
||||
"""
|
||||
Function which returns the extent of node with rank
|
||||
rank in the grid
|
||||
|
@ -45,7 +45,7 @@ class SHMEMSimulator(Simulator.BaseSimulator):
|
||||
# This would also eliminate the need for all the array bookkeeping in this class.
|
||||
autotuner = sims[0].context.autotuner
|
||||
sims[0].context.autotuner = None
|
||||
boundary_conditions = sims[0].getBoundaryConditions()
|
||||
boundary_conditions = sims[0].get_boundary_conditions()
|
||||
super().__init__(sims[0].context,
|
||||
sims[0].nx, sims[0].ny,
|
||||
sims[0].dx, sims[0].dy,
|
||||
@ -108,14 +108,14 @@ class SHMEMSimulator(Simulator.BaseSimulator):
|
||||
if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic):
|
||||
self.north = None
|
||||
new_boundary_conditions.north = boundary_conditions.north;
|
||||
sim.setBoundaryConditions(new_boundary_conditions)
|
||||
sim.set_boundary_conditions(new_boundary_conditions)
|
||||
|
||||
#Get number of variables
|
||||
self.nvars[i] = len(sim.getOutput().gpu_variables)
|
||||
self.nvars[i] = len(sim.get_output().gpu_variables)
|
||||
|
||||
#Shorthands for computing extents and sizes
|
||||
gc_x = int(sim.getOutput()[0].x_halo)
|
||||
gc_y = int(sim.getOutput()[0].y_halo)
|
||||
gc_x = int(sim.get_output()[0].x_halo)
|
||||
gc_y = int(sim.get_output()[0].y_halo)
|
||||
nx = int(sim.nx)
|
||||
ny = int(sim.ny)
|
||||
|
||||
@ -150,10 +150,10 @@ class SHMEMSimulator(Simulator.BaseSimulator):
|
||||
for i, sim in enumerate(self.sims):
|
||||
sim.substep(dt, step_number)
|
||||
|
||||
def getOutput(self):
|
||||
def get_output(self):
|
||||
# XXX: Does not return what we would expect.
|
||||
# Returns first subdomain, but we want the whole domain.
|
||||
return self.sims[0].getOutput()
|
||||
return self.sims[0].get_output()
|
||||
|
||||
def synchronize(self):
|
||||
for sim in self.sims:
|
||||
@ -164,14 +164,14 @@ class SHMEMSimulator(Simulator.BaseSimulator):
|
||||
# Checks only first subdomain, but we want to check the whole domain.
|
||||
return self.sims[0].check()
|
||||
|
||||
def computeDt(self):
|
||||
def compute_dt(self):
|
||||
global_dt = float("inf")
|
||||
|
||||
for sim in self.sims:
|
||||
sim.context.synchronize()
|
||||
|
||||
for sim in self.sims:
|
||||
local_dt = sim.computeDt()
|
||||
local_dt = sim.compute_dt()
|
||||
if local_dt < global_dt:
|
||||
global_dt = local_dt
|
||||
self.logger.debug("Local dt: {:f}".format(local_dt))
|
||||
@ -179,7 +179,7 @@ class SHMEMSimulator(Simulator.BaseSimulator):
|
||||
self.logger.debug("Global dt: {:f}".format(global_dt))
|
||||
return global_dt
|
||||
|
||||
def getExtent(self, index=0):
|
||||
def get_extent(self, index=0):
|
||||
"""
|
||||
Function which returns the extent of the subdomain with index
|
||||
index in the grid
|
||||
|
@ -62,8 +62,8 @@ class SHMEMGrid(object):
|
||||
|
||||
for i in range(self.ngpus):
|
||||
# XXX: disabled for testing on single-GPU system
|
||||
#self.cuda_contexts.append(CudaContext.CudaContext(device=i, autotuning=False))
|
||||
self.cuda_contexts.append(CudaContext.CudaContext(device=0, autotuning=False))
|
||||
#self.cuda_contexts.append(CudaContext(device=i, autotuning=False))
|
||||
self.cuda_contexts.append(CudaContext(device=0, autotuning=False))
|
||||
|
||||
def getCoordinate(self, index):
|
||||
i = (index % self.grid[0])
|
||||
@ -180,7 +180,7 @@ class SHMEMSimulatorGroup(object):
|
||||
|
||||
autotuner = sims[0].context.autotuner
|
||||
sims[0].context.autotuner = None
|
||||
boundary_conditions = sims[0].getBoundaryConditions()
|
||||
boundary_conditions = sims[0].get_boundary_conditions()
|
||||
super().__init__(sims[0].context,
|
||||
sims[0].nx, sims[0].ny,
|
||||
sims[0].dx, sims[0].dy,
|
||||
@ -243,14 +243,14 @@ class SHMEMSimulatorGroup(object):
|
||||
if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic):
|
||||
self.north = None
|
||||
new_boundary_conditions.north = boundary_conditions.north;
|
||||
sim.setBoundaryConditions(new_boundary_conditions)
|
||||
sim.set_boundary_conditions(new_boundary_conditions)
|
||||
|
||||
#Get number of variables
|
||||
self.nvars[i] = len(sim.getOutput().gpu_variables)
|
||||
self.nvars[i] = len(sim.get_output().gpu_variables)
|
||||
|
||||
#Shorthands for computing extents and sizes
|
||||
gc_x = int(sim.getOutput()[0].x_halo)
|
||||
gc_y = int(sim.getOutput()[0].y_halo)
|
||||
gc_x = int(sim.get_output()[0].x_halo)
|
||||
gc_y = int(sim.get_output()[0].y_halo)
|
||||
nx = int(sim.nx)
|
||||
ny = int(sim.ny)
|
||||
|
||||
@ -287,7 +287,7 @@ class SHMEMSimulatorGroup(object):
|
||||
def getOutput(self):
|
||||
# XXX: Does not return what we would expect.
|
||||
# Returns first subdomain, but we want the whole domain.
|
||||
return self.sims[0].getOutput()
|
||||
return self.sims[0].get_output()
|
||||
|
||||
def synchronize(self):
|
||||
for sim in self.sims:
|
||||
@ -305,7 +305,7 @@ class SHMEMSimulatorGroup(object):
|
||||
sim.context.synchronize()
|
||||
|
||||
for sim in self.sims:
|
||||
local_dt = sim.computeDt()
|
||||
local_dt = sim.compute_dt()
|
||||
if local_dt < global_dt:
|
||||
global_dt = local_dt
|
||||
self.logger.debug("Local dt: {:f}".format(local_dt))
|
||||
|
@ -20,18 +20,38 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
import logging
|
||||
from enum import IntEnum
|
||||
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from GPUSimulators import Common
|
||||
from GPUSimulators.common import ProgressPrinter
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
|
||||
|
||||
class BoundaryCondition(object):
|
||||
def get_types(bc):
|
||||
types = {'north': BoundaryCondition.Type((bc >> 24) & 0x0000000F),
|
||||
'south': BoundaryCondition.Type((bc >> 16) & 0x0000000F),
|
||||
'east': BoundaryCondition.Type((bc >> 8) & 0x0000000F),
|
||||
'west': BoundaryCondition.Type((bc >> 0) & 0x0000000F)}
|
||||
return types
|
||||
|
||||
|
||||
def step_order_to_coded_int(step, order):
|
||||
"""
|
||||
Helper function which packs the step and order into a single integer
|
||||
"""
|
||||
|
||||
step_order = (step << 16) | (order & 0x0000ffff)
|
||||
# print("Step: {0:032b}".format(step))
|
||||
# print("Order: {0:032b}".format(order))
|
||||
# print("Mix: {0:032b}".format(step_order))
|
||||
return np.int32(step_order)
|
||||
|
||||
|
||||
class BoundaryCondition(object):
|
||||
"""
|
||||
Class for holding boundary conditions for global boundaries
|
||||
"""
|
||||
@ -47,12 +67,7 @@ class BoundaryCondition(object):
|
||||
Periodic = 2,
|
||||
Reflective = 3
|
||||
|
||||
def __init__(self, types={
|
||||
'north': Type.Reflective,
|
||||
'south': Type.Reflective,
|
||||
'east': Type.Reflective,
|
||||
'west': Type.Reflective
|
||||
}):
|
||||
def __init__(self, types: dict[str: Type.Reflective]):
|
||||
"""
|
||||
Constructor
|
||||
"""
|
||||
@ -61,17 +76,18 @@ class BoundaryCondition(object):
|
||||
self.south = types['south']
|
||||
self.east = types['east']
|
||||
self.west = types['west']
|
||||
|
||||
if (self.north == BoundaryCondition.Type.Neumann \
|
||||
or self.south == BoundaryCondition.Type.Neumann \
|
||||
or self.east == BoundaryCondition.Type.Neumann \
|
||||
or self.west == BoundaryCondition.Type.Neumann):
|
||||
raise(NotImplementedError("Neumann boundary condition not supported"))
|
||||
|
||||
def __str__(self):
|
||||
return '[north={:s}, south={:s}, east={:s}, west={:s}]'.format(str(self.north), str(self.south), str(self.east), str(self.west))
|
||||
|
||||
def asCodedInt(self):
|
||||
if (self.north == BoundaryCondition.Type.Neumann
|
||||
or self.south == BoundaryCondition.Type.Neumann
|
||||
or self.east == BoundaryCondition.Type.Neumann
|
||||
or self.west == BoundaryCondition.Type.Neumann):
|
||||
raise (NotImplementedError("Neumann boundary condition not supported"))
|
||||
|
||||
def __str__(self):
|
||||
return '[north={:s}, south={:s}, east={:s}, west={:s}]'.format(str(self.north), str(self.south), str(self.east),
|
||||
str(self.west))
|
||||
|
||||
def as_coded_int(self):
|
||||
"""
|
||||
Helper function which packs four boundary conditions into one integer
|
||||
"""
|
||||
@ -79,26 +95,18 @@ class BoundaryCondition(object):
|
||||
bc = 0
|
||||
bc = bc | (self.north & 0x0000000F) << 24
|
||||
bc = bc | (self.south & 0x0000000F) << 16
|
||||
bc = bc | (self.east & 0x0000000F) << 8
|
||||
bc = bc | (self.west & 0x0000000F) << 0
|
||||
|
||||
#for t in types:
|
||||
bc = bc | (self.east & 0x0000000F) << 8
|
||||
bc = bc | (self.west & 0x0000000F) << 0
|
||||
|
||||
# for t in types:
|
||||
# print("{0:s}, {1:d}, {1:032b}, {1:08b}".format(t, types[t]))
|
||||
#print("bc: {0:032b}".format(bc))
|
||||
|
||||
# print("bc: {0:032b}".format(bc))
|
||||
|
||||
return np.int32(bc)
|
||||
|
||||
def getTypes(bc):
|
||||
types = {}
|
||||
types['north'] = BoundaryCondition.Type((bc >> 24) & 0x0000000F)
|
||||
types['south'] = BoundaryCondition.Type((bc >> 16) & 0x0000000F)
|
||||
types['east'] = BoundaryCondition.Type((bc >> 8) & 0x0000000F)
|
||||
types['west'] = BoundaryCondition.Type((bc >> 0) & 0x0000000F)
|
||||
return types
|
||||
|
||||
|
||||
class BaseSimulator(object):
|
||||
|
||||
|
||||
def __init__(self,
|
||||
context: CudaContext,
|
||||
nx: int, ny: int,
|
||||
@ -125,40 +133,40 @@ class BaseSimulator(object):
|
||||
num_substeps: Number of substeps to perform for a full step
|
||||
"""
|
||||
|
||||
#Get logger
|
||||
# Get logger
|
||||
self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
#GPU kernel
|
||||
|
||||
# Save input parameters
|
||||
# Notice that we need to specify them in the correct dataformat for the
|
||||
# GPU kernel
|
||||
self.context = context
|
||||
self.nx = np.int32(nx)
|
||||
self.ny = np.int32(ny)
|
||||
self.dx = np.float32(dx)
|
||||
self.dy = np.float32(dy)
|
||||
self.setBoundaryConditions(boundary_conditions)
|
||||
self.set_boundary_conditions(boundary_conditions)
|
||||
self.cfl_scale = cfl_scale
|
||||
self.num_substeps = num_substeps
|
||||
|
||||
#Handle autotuning block size
|
||||
|
||||
# Handle autotuning block size
|
||||
if self.context.autotuner:
|
||||
peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
|
||||
block_width = int(peak_configuration["block_width"])
|
||||
block_height = int(peak_configuration["block_height"])
|
||||
self.logger.debug("Used autotuning to get block size [%d x %d]", block_width, block_height)
|
||||
|
||||
#Compute kernel launch parameters
|
||||
self.block_size = (block_width, block_height, 1)
|
||||
self.grid_size = (
|
||||
int(np.ceil(self.nx / float(self.block_size[0]))),
|
||||
int(np.ceil(self.ny / float(self.block_size[1])))
|
||||
)
|
||||
|
||||
#Create a CUDA stream
|
||||
|
||||
# Compute kernel launch parameters
|
||||
self.block_size = (block_width, block_height, 1)
|
||||
self.grid_size = (
|
||||
int(np.ceil(self.nx / float(self.block_size[0]))),
|
||||
int(np.ceil(self.ny / float(self.block_size[1])))
|
||||
)
|
||||
|
||||
# Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
self.internal_stream = cuda.Stream()
|
||||
|
||||
#Keep track of simulation time and number of timesteps
|
||||
|
||||
# Keep track of simulation time and number of timesteps
|
||||
self.t = 0.0
|
||||
self.nt = 0
|
||||
|
||||
@ -171,41 +179,41 @@ class BaseSimulator(object):
|
||||
Requires that the step() function is implemented in the subclasses
|
||||
"""
|
||||
|
||||
printer = Common.ProgressPrinter(t)
|
||||
|
||||
t_start = self.simTime()
|
||||
printer = ProgressPrinter(t)
|
||||
|
||||
t_start = self.sim_time()
|
||||
t_end = t_start + t
|
||||
|
||||
|
||||
update_dt = True
|
||||
if (dt is not None):
|
||||
if dt is not None:
|
||||
update_dt = False
|
||||
self.dt = dt
|
||||
|
||||
while(self.simTime() < t_end):
|
||||
|
||||
while self.sim_time() < t_end:
|
||||
# Update dt every 100 timesteps and cross your fingers it works
|
||||
# for the next 100
|
||||
if (update_dt and (self.simSteps() % 100 == 0)):
|
||||
self.dt = self.computeDt()*self.cfl_scale
|
||||
|
||||
if update_dt and (self.sim_steps() % 100 == 0):
|
||||
self.dt = self.compute_dt() * self.cfl_scale
|
||||
|
||||
# Compute timestep for "this" iteration (i.e., shorten last timestep)
|
||||
current_dt = np.float32(min(self.dt, t_end-self.simTime()))
|
||||
current_dt = np.float32(min(self.dt, t_end - self.sim_time()))
|
||||
|
||||
# Stop if end reached (should not happen)
|
||||
if (current_dt <= 0.0):
|
||||
self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.simSteps()))
|
||||
if current_dt <= 0.0:
|
||||
self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.sim_steps()))
|
||||
break
|
||||
|
||||
|
||||
# Step forward in time
|
||||
self.step(current_dt)
|
||||
|
||||
#Print info
|
||||
print_string = printer.getPrintString(self.simTime() - t_start)
|
||||
if (print_string):
|
||||
# Print info
|
||||
print_string = printer.get_print_string(self.sim_time() - t_start)
|
||||
if print_string:
|
||||
self.logger.info("%s: %s", self, print_string)
|
||||
try:
|
||||
self.check()
|
||||
except AssertionError as e:
|
||||
e.args += ("Step={:d}, time={:f}".format(self.simSteps(), self.simTime()),)
|
||||
e.args += ("Step={:d}, time={:f}".format(self.sim_steps(), self.sim_time()),)
|
||||
raise
|
||||
|
||||
def step(self, dt: int):
|
||||
@ -218,57 +226,45 @@ class BaseSimulator(object):
|
||||
|
||||
for i in range(self.num_substeps):
|
||||
self.substep(dt, i)
|
||||
|
||||
|
||||
self.t += dt
|
||||
self.nt += 1
|
||||
|
||||
def download(self, variables=None):
|
||||
return self.getOutput().download(self.stream, variables)
|
||||
|
||||
return self.get_output().download(self.stream, variables)
|
||||
|
||||
def synchronize(self):
|
||||
self.stream.synchronize()
|
||||
|
||||
def simTime(self):
|
||||
|
||||
def sim_time(self):
|
||||
return self.t
|
||||
|
||||
def simSteps(self):
|
||||
def sim_steps(self):
|
||||
return self.nt
|
||||
|
||||
def getExtent(self):
|
||||
return [0, 0, self.nx*self.dx, self.ny*self.dy]
|
||||
|
||||
def setBoundaryConditions(self, boundary_conditions):
|
||||
|
||||
def get_extent(self):
|
||||
return [0, 0, self.nx * self.dx, self.ny * self.dy]
|
||||
|
||||
def set_boundary_conditions(self, boundary_conditions):
|
||||
self.logger.debug("Boundary conditions set to {:s}".format(str(boundary_conditions)))
|
||||
self.boundary_conditions = boundary_conditions.asCodedInt()
|
||||
|
||||
def getBoundaryConditions(self):
|
||||
return BoundaryCondition(BoundaryCondition.getTypes(self.boundary_conditions))
|
||||
|
||||
self.boundary_conditions = boundary_conditions.as_coded_int()
|
||||
|
||||
def get_boundary_conditions(self):
|
||||
return BoundaryCondition(get_types())
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
"""
|
||||
Function which performs one single substep with stepsize dt
|
||||
"""
|
||||
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def getOutput(self):
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
raise (NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def get_output(self):
|
||||
raise (NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def check(self):
|
||||
self.logger.warning("check() is not implemented - please implement")
|
||||
#raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def computeDt(self):
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
# raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
|
||||
def stepOrderToCodedInt(step, order):
|
||||
"""
|
||||
Helper function which packs the step and order into a single integer
|
||||
"""
|
||||
|
||||
step_order = (step << 16) | (order & 0x0000ffff)
|
||||
#print("Step: {0:032b}".format(step))
|
||||
#print("Order: {0:032b}".format(order))
|
||||
#print("Mix: {0:032b}".format(step_order))
|
||||
return np.int32(step_order)
|
||||
def compute_dt(self):
|
||||
raise (NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
@ -20,30 +20,31 @@ You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator, Common
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
# Import packages we need
|
||||
import numpy as np
|
||||
|
||||
from pycuda import gpuarray
|
||||
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
class WAF (Simulator.BaseSimulator):
|
||||
|
||||
class WAF(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
def __init__(self,
|
||||
context,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
h0, hu0, hv0,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
g,
|
||||
cfl_scale=0.9,
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
boundary_conditions=BoundaryCondition(),
|
||||
block_width=16, block_height=16,
|
||||
dt: float=None,
|
||||
compile_opts: list[str]=[]):
|
||||
dt: float = None,
|
||||
compile_opts: list[str] = []):
|
||||
"""
|
||||
Initialization routine
|
||||
|
||||
@ -59,79 +60,79 @@ class WAF (Simulator.BaseSimulator):
|
||||
g: Gravitational accelleration (9.81 m/s^2)
|
||||
compile_opts: Pass a list of nvcc compiler options
|
||||
"""
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height);
|
||||
self.g = np.float32(g)
|
||||
|
||||
#Get kernels
|
||||
module = context.get_module("cuda/SWE2D_WAF.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
# Call super constructor
|
||||
super().__init__(context,
|
||||
nx, ny,
|
||||
dx, dy,
|
||||
boundary_conditions,
|
||||
cfl_scale,
|
||||
2,
|
||||
block_width, block_height)
|
||||
self.g = np.float32(g)
|
||||
|
||||
# Get kernels
|
||||
module = context.get_module("cuda/SWE2D_WAF.cu",
|
||||
defines={
|
||||
'BLOCK_WIDTH': self.block_size[0],
|
||||
'BLOCK_HEIGHT': self.block_size[1]
|
||||
},
|
||||
compile_args={
|
||||
'no_extern_c': True,
|
||||
'options': ["--use_fast_math"] + compile_opts,
|
||||
},
|
||||
jit_compile_args={})
|
||||
self.kernel = module.get_function("WAFKernel")
|
||||
self.kernel.prepare("iiffffiiPiPiPiPiPiPiPiiii")
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None])
|
||||
|
||||
# Create data by uploading to the device
|
||||
self.u0 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = ArakawaA2D(self.stream,
|
||||
nx, ny,
|
||||
2, 2,
|
||||
[None, None, None])
|
||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||
|
||||
if dt == None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||
|
||||
if dt is None:
|
||||
dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0)))
|
||||
dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0)))
|
||||
self.dt = min(dt_x, dt_y)
|
||||
else:
|
||||
self.dt = dt
|
||||
|
||||
|
||||
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||
|
||||
|
||||
def substep(self, dt, step_number):
|
||||
self.substepDimsplit(dt*0.5, step_number)
|
||||
|
||||
def substepDimsplit(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.substep_dimsplit(dt * 0.5, step_number)
|
||||
|
||||
def substep_dimsplit(self, dt, substep):
|
||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||
self.nx, self.ny,
|
||||
self.dx, self.dy, dt,
|
||||
self.g,
|
||||
substep,
|
||||
self.boundary_conditions,
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0],
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0],
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0],
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||
self.cfl_data.gpudata,
|
||||
0, 0,
|
||||
self.nx, self.ny)
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def getOutput(self):
|
||||
def get_output(self):
|
||||
return self.u0
|
||||
|
||||
|
||||
def check(self):
|
||||
self.u0.check()
|
||||
self.u1.check()
|
||||
|
||||
def computeDt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get();
|
||||
return max_dt*0.5
|
||||
|
||||
def compute_dt(self):
|
||||
max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get()
|
||||
return max_dt * 0.5
|
||||
|
9
GPUSimulators/common/__init__.py
Normal file
9
GPUSimulators/common/__init__.py
Normal file
@ -0,0 +1,9 @@
|
||||
from .arkawa_2d import ArakawaA2D
|
||||
from .common import *
|
||||
from .cuda_array_2d import CudaArray2D
|
||||
from .cuda_array_3d import CudaArray3D
|
||||
from .data_dumper import DataDumper
|
||||
from .ip_engine import IPEngine
|
||||
from .popen_file_buffer import PopenFileBuffer
|
||||
from .progress_printer import ProgressPrinter
|
||||
from .timer import Timer
|
57
GPUSimulators/common/arkawa_2d.py
Normal file
57
GPUSimulators/common/arkawa_2d.py
Normal file
@ -0,0 +1,57 @@
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import pycuda.gpuarray
|
||||
|
||||
from GPUSimulators.common.cuda_array_2d import CudaArray2D
|
||||
|
||||
|
||||
class ArakawaA2D:
|
||||
"""
|
||||
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
||||
"""
|
||||
|
||||
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
|
||||
"""
|
||||
Uploads initial data to the GPU device
|
||||
"""
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.gpu_variables = []
|
||||
for cpu_variable in cpu_variables:
|
||||
self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
|
||||
|
||||
def __getitem__(self, key):
|
||||
if type(key) != int:
|
||||
raise TypeError("Indexing is int based")
|
||||
|
||||
if key > len(self.gpu_variables) or key < 0:
|
||||
raise IndexError("Out of bounds")
|
||||
return self.gpu_variables[key]
|
||||
|
||||
def download(self, stream, variables=None):
|
||||
"""
|
||||
Enables downloading data from the GPU device to Python
|
||||
"""
|
||||
if variables is None:
|
||||
variables = range(len(self.gpu_variables))
|
||||
|
||||
cpu_variables = []
|
||||
for i in variables:
|
||||
if i >= len (self.gpu_variables):
|
||||
raise IndexError(f"Variable {i} is out of range")
|
||||
cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)]
|
||||
|
||||
# stream.synchronize()
|
||||
return cpu_variables
|
||||
|
||||
def check(self):
|
||||
"""
|
||||
Checks that data is still sane
|
||||
"""
|
||||
for i, gpu_variable in enumerate(self.gpu_variables):
|
||||
var_sum = pycuda.gpuarray.sum(gpu_variable.data).get()
|
||||
self.logger.debug(f"Data {i} with size [{gpu_variable.nx} x {gpu_variable.ny}] "
|
||||
+ f"has average {var_sum / (gpu_variable.nx * gpu_variable.ny)}")
|
||||
|
||||
if np.isnan(var_sum):
|
||||
raise ValueError("Data contains NaN values!")
|
205
GPUSimulators/common/common.py
Normal file
205
GPUSimulators/common/common.py
Normal file
@ -0,0 +1,205 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
This python module implements the different helper functions and
|
||||
classes
|
||||
|
||||
Copyright (C) 2018 SINTEF ICT
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
import subprocess
|
||||
import logging
|
||||
import json
|
||||
|
||||
from GPUSimulators.common.data_dumper import DataDumper
|
||||
from GPUSimulators.common.progress_printer import ProgressPrinter
|
||||
from GPUSimulators.common.timer import Timer
|
||||
|
||||
|
||||
def safe_call(cmd):
|
||||
logger = logging.getLogger(__name__)
|
||||
try:
|
||||
#git rev-parse HEAD
|
||||
current_dir = os.path.dirname(os.path.realpath(__file__))
|
||||
params = dict()
|
||||
params['stderr'] = subprocess.STDOUT
|
||||
params['cwd'] = current_dir
|
||||
params['universal_newlines'] = True #text=True in more recent python
|
||||
params['shell'] = False
|
||||
if os.name == 'nt':
|
||||
params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
stdout = subprocess.check_output(cmd, **params)
|
||||
except subprocess.CalledProcessError as e:
|
||||
output = e.output
|
||||
logger.error("Git failed, \nReturn code: " + str(e.returncode) + "\nOutput: " + output)
|
||||
raise e
|
||||
|
||||
return stdout
|
||||
|
||||
|
||||
def get_git_hash():
|
||||
return safe_call(["git", "rev-parse", "HEAD"])
|
||||
|
||||
|
||||
def get_git_status():
|
||||
return safe_call(["git", "status", "--porcelain", "-uno"])
|
||||
|
||||
|
||||
def to_json(in_dict, compressed=True):
|
||||
"""
|
||||
Creates JSON string from a dictionary
|
||||
"""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
out_dict = in_dict.copy()
|
||||
for key in out_dict:
|
||||
if isinstance(out_dict[key], np.ndarray):
|
||||
out_dict[key] = out_dict[key].tolist()
|
||||
else:
|
||||
try:
|
||||
json.dumps(out_dict[key])
|
||||
except:
|
||||
value = str(out_dict[key])
|
||||
logger.warning("JSON: Converting {:s} to string ({:s})".format(key, value))
|
||||
out_dict[key] = value
|
||||
return json.dumps(out_dict)
|
||||
|
||||
|
||||
def run_simulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None):
|
||||
"""
|
||||
Runs a simulation, and store output in a netcdf file. Stores the times given in
|
||||
save_times, and saves all the variables in list save_var_names. Elements in
|
||||
save_var_names can be set to None if you do not want to save them
|
||||
"""
|
||||
|
||||
profiling_data_sim_runner = { 'start': {}, 'end': {} }
|
||||
profiling_data_sim_runner["start"]["t_sim_init"] = 0
|
||||
profiling_data_sim_runner["end"]["t_sim_init"] = 0
|
||||
profiling_data_sim_runner["start"]["t_nc_write"] = 0
|
||||
profiling_data_sim_runner["end"]["t_nc_write"] = 0
|
||||
profiling_data_sim_runner["start"]["t_full_step"] = 0
|
||||
profiling_data_sim_runner["end"]["t_full_step"] = 0
|
||||
|
||||
profiling_data_sim_runner["start"]["t_sim_init"] = time.time()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if len(save_times <= 0):
|
||||
raise ValueError("Need to specify which times to save")
|
||||
|
||||
with Timer("construct") as t:
|
||||
sim = simulator(**simulator_args)
|
||||
logger.info(f"Constructed in {str(t.secs)} seconds")
|
||||
|
||||
#Create a netcdf file and simulate
|
||||
with DataDumper(outfile, mode='w', clobber=False) as outdata:
|
||||
|
||||
#Create attributes (metadata)
|
||||
outdata.ncfile.created = time.ctime(time.time())
|
||||
outdata.ncfile.git_hash = get_git_hash()
|
||||
outdata.ncfile.git_status = get_git_status()
|
||||
outdata.ncfile.simulator = str(simulator)
|
||||
|
||||
# do not write fields to attributes (they are to large)
|
||||
simulator_args_for_ncfile = simulator_args.copy()
|
||||
del simulator_args_for_ncfile["rho"]
|
||||
del simulator_args_for_ncfile["rho_u"]
|
||||
del simulator_args_for_ncfile["rho_v"]
|
||||
del simulator_args_for_ncfile["E"]
|
||||
outdata.ncfile.sim_args = to_json(simulator_args_for_ncfile)
|
||||
|
||||
#Create dimensions
|
||||
outdata.ncfile.createDimension('time', len(save_times))
|
||||
outdata.ncfile.createDimension('x', simulator_args['nx'])
|
||||
outdata.ncfile.createDimension('y', simulator_args['ny'])
|
||||
|
||||
#Create variables for dimensions
|
||||
ncvars = {'time': outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time'),
|
||||
'x': outdata.ncfile.createVariable('x', np.dtype('float32').char, 'x'),
|
||||
'y': outdata.ncfile.createVariable('y', np.dtype('float32').char, 'y')}
|
||||
|
||||
#Fill variables with proper values
|
||||
ncvars['time'][:] = save_times
|
||||
extent = sim.get_extent()
|
||||
ncvars['x'][:] = np.linspace(extent[0], extent[1], simulator_args['nx'])
|
||||
ncvars['y'][:] = np.linspace(extent[2], extent[3], simulator_args['ny'])
|
||||
|
||||
#Choose which variables to download (prune None from the list, but keep the index)
|
||||
download_vars = []
|
||||
for i, var_name in enumerate(save_var_names):
|
||||
if var_name is not None:
|
||||
download_vars += [i]
|
||||
save_var_names = list(save_var_names[i] for i in download_vars)
|
||||
|
||||
#Create variables
|
||||
for var_name in save_var_names:
|
||||
ncvars[var_name] = outdata.ncfile.createVariable(
|
||||
var_name, np.dtype('float32').char, ('time', 'y', 'x'), zlib=True, least_significant_digit=3)
|
||||
|
||||
#Create step sizes between each save
|
||||
t_steps = np.empty_like(save_times)
|
||||
t_steps[0] = save_times[0]
|
||||
t_steps[1:] = save_times[1:] - save_times[0:-1]
|
||||
|
||||
profiling_data_sim_runner["end"]["t_sim_init"] = time.time()
|
||||
|
||||
# Start simulation loop
|
||||
progress_printer = ProgressPrinter(save_times[-1], print_every=10)
|
||||
for k in range(len(save_times)):
|
||||
# Get target time and step size there
|
||||
t_step = t_steps[k]
|
||||
t_end = save_times[k]
|
||||
|
||||
# Sanity check simulator
|
||||
try:
|
||||
sim.check()
|
||||
except AssertionError as e:
|
||||
logger.error(f"Error after {sim.sim_steps()} steps (t={sim.sim_time()}: {str(e)}")
|
||||
return outdata.filename
|
||||
|
||||
profiling_data_sim_runner["start"]["t_full_step"] += time.time()
|
||||
|
||||
# Simulate
|
||||
if t_step > 0.0:
|
||||
sim.simulate(t_step, dt)
|
||||
|
||||
profiling_data_sim_runner["end"]["t_full_step"] += time.time()
|
||||
|
||||
profiling_data_sim_runner["start"]["t_nc_write"] += time.time()
|
||||
|
||||
#Download
|
||||
save_vars = sim.download(download_vars)
|
||||
|
||||
#Save to file
|
||||
for i, var_name in enumerate(save_var_names):
|
||||
ncvars[var_name][k, :] = save_vars[i]
|
||||
|
||||
profiling_data_sim_runner["end"]["t_nc_write"] += time.time()
|
||||
|
||||
#Write progress to screen
|
||||
print_string = progress_printer.get_print_string(t_end)
|
||||
if print_string:
|
||||
logger.debug(print_string)
|
||||
|
||||
logger.debug(f"Simulated to t={t_end} in "
|
||||
+ f"{sim.sim_steps()} timesteps (average dt={sim.sim_time() / sim.sim_steps()})")
|
||||
|
||||
return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi
|
||||
|
139
GPUSimulators/common/cuda_array_2d.py
Normal file
139
GPUSimulators/common/cuda_array_2d.py
Normal file
@ -0,0 +1,139 @@
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
from pycuda.tools import PageLockedMemoryPool
|
||||
|
||||
|
||||
class CudaArray2D:
|
||||
"""
|
||||
Class that holds 2D CUDA data
|
||||
"""
|
||||
|
||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
|
||||
"""
|
||||
Uploads initial data to the CUDA device
|
||||
"""
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
self.x_halo = x_halo
|
||||
self.y_halo = y_halo
|
||||
|
||||
nx_halo = nx + 2 * x_halo
|
||||
ny_halo = ny + 2 * y_halo
|
||||
|
||||
# self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
||||
# Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||
self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype)
|
||||
|
||||
# For returning to download
|
||||
self.memorypool = PageLockedMemoryPool()
|
||||
|
||||
# If we don't have any data, just allocate and return
|
||||
if cpu_data is None:
|
||||
return
|
||||
|
||||
# Make sure data is in proper format
|
||||
if cpu_data.shape != (ny_halo, nx_halo) and cpu_data.shape != (self.ny, self.nx):
|
||||
raise ValueError(
|
||||
f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.ny, self.nx))} / {str((ny_halo, nx_halo))}")
|
||||
|
||||
if cpu_data.itemsize != 4:
|
||||
raise ValueError("Wrong size of data type")
|
||||
|
||||
if np.isfortran(cpu_data):
|
||||
raise TypeError("Wrong datatype (Fortran, expected C)")
|
||||
|
||||
# Create a copy object from host to device
|
||||
x = (nx_halo - cpu_data.shape[1]) // 2
|
||||
y = (ny_halo - cpu_data.shape[0]) // 2
|
||||
self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
|
||||
# self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
|
||||
|
||||
def __del__(self, *args):
|
||||
# self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
|
||||
self.data.gpudata.free()
|
||||
self.data = None
|
||||
|
||||
def download(self, stream, cpu_data=None, asynch=False, extent=None):
|
||||
"""
|
||||
Enables downloading data from GPU to Python
|
||||
"""
|
||||
|
||||
if extent is None:
|
||||
x = self.x_halo
|
||||
y = self.y_halo
|
||||
nx = self.nx
|
||||
ny = self.ny
|
||||
else:
|
||||
x, y, nx, ny = extent
|
||||
|
||||
if cpu_data is None:
|
||||
# self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||
# Allocate host memory
|
||||
# The following fails, don't know why (crashes python)
|
||||
cpu_data = cuda.pagelocked_empty((int(ny), int(nx)), dtype=np.float32,
|
||||
mem_flags=cuda.host_alloc_flags.PORTABLE)
|
||||
# Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32)
|
||||
# cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32)
|
||||
|
||||
assert nx == cpu_data.shape[1]
|
||||
assert ny == cpu_data.shape[0]
|
||||
assert x + nx <= self.nx + 2 * self.x_halo
|
||||
assert y + ny <= self.ny + 2 * self.y_halo
|
||||
|
||||
# Create a copy object from device to host
|
||||
copy = cuda.Memcpy2D()
|
||||
copy.set_src_device(self.data.gpudata)
|
||||
copy.set_dst_host(cpu_data)
|
||||
|
||||
# Set offsets and pitch of a source
|
||||
copy.src_x_in_bytes = int(x) * self.data.strides[1]
|
||||
copy.src_y = int(y)
|
||||
copy.src_pitch = self.data.strides[0]
|
||||
|
||||
# Set width in bytes to copy for each row and
|
||||
# number of rows to copy
|
||||
copy.width_in_bytes = int(nx) * cpu_data.itemsize
|
||||
copy.height = int(ny)
|
||||
|
||||
copy(stream)
|
||||
if not asynch:
|
||||
stream.synchronize()
|
||||
|
||||
return cpu_data
|
||||
|
||||
def upload(self, stream, cpu_data, extent=None):
|
||||
if extent is None:
|
||||
x = self.x_halo
|
||||
y = self.y_halo
|
||||
nx = self.nx
|
||||
ny = self.ny
|
||||
else:
|
||||
x, y, nx, ny = extent
|
||||
|
||||
assert (nx == cpu_data.shape[1])
|
||||
assert (ny == cpu_data.shape[0])
|
||||
assert (x + nx <= self.nx + 2 * self.x_halo)
|
||||
assert (y + ny <= self.ny + 2 * self.y_halo)
|
||||
|
||||
# Create a copy object from device to host
|
||||
copy = cuda.Memcpy2D()
|
||||
copy.set_dst_device(self.data.gpudata)
|
||||
copy.set_src_host(cpu_data)
|
||||
|
||||
# Set offsets and pitch of a source
|
||||
copy.dst_x_in_bytes = int(x) * self.data.strides[1]
|
||||
copy.dst_y = int(y)
|
||||
copy.dst_pitch = self.data.strides[0]
|
||||
|
||||
# Set width in bytes to copy for each row and
|
||||
# number of rows to copy
|
||||
copy.width_in_bytes = int(nx) * cpu_data.itemsize
|
||||
copy.height = int(ny)
|
||||
|
||||
copy(stream)
|
120
GPUSimulators/common/cuda_array_3d.py
Normal file
120
GPUSimulators/common/cuda_array_3d.py
Normal file
@ -0,0 +1,120 @@
|
||||
import logging
|
||||
|
||||
import numpy as np
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
from pycuda.tools import PageLockedMemoryPool
|
||||
|
||||
|
||||
class CudaArray3D:
|
||||
"""
|
||||
Class that holds 3D data
|
||||
"""
|
||||
|
||||
def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32):
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
self.nz = nz
|
||||
self.x_halo = x_halo
|
||||
self.y_halo = y_halo
|
||||
self.z_halo = z_halo
|
||||
|
||||
nx_halo = nx + 2 * x_halo
|
||||
ny_halo = ny + 2 * y_halo
|
||||
nz_halo = nz + 2 * z_halo
|
||||
|
||||
# self.logger.debug("Allocating [%dx%dx%d] buffer", self.nx, self.ny, self.nz)
|
||||
# Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||
self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype)
|
||||
|
||||
# For returning to download
|
||||
self.memorypool = PageLockedMemoryPool()
|
||||
|
||||
# If we don't have any data, just allocate and return
|
||||
if cpu_data is None:
|
||||
return
|
||||
|
||||
# Make sure data is in proper format
|
||||
if (cpu_data.shape != (nz_halo, ny_halo, nx_halo)
|
||||
and cpu_data.shape != (self.nz, self.ny, self.nx)):
|
||||
raise ValueError(f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.nz, self.ny, self.nx))} / {str((nz_halo, ny_halo, nx_halo))}")
|
||||
|
||||
if cpu_data.itemsize != 4:
|
||||
raise ValueError("Wrong size of data type")
|
||||
|
||||
if np.isfortran(cpu_data):
|
||||
raise TypeError("Wrong datatype (Fortran, expected C)")
|
||||
|
||||
# Create a copy object from host to device
|
||||
copy = cuda.Memcpy3D()
|
||||
copy.set_src_host(cpu_data)
|
||||
copy.set_dst_device(self.data.gpudata)
|
||||
|
||||
# Set offsets of destination
|
||||
x_offset = (nx_halo - cpu_data.shape[2]) // 2
|
||||
y_offset = (ny_halo - cpu_data.shape[1]) // 2
|
||||
z_offset = (nz_halo - cpu_data.shape[0]) // 2
|
||||
copy.dst_x_in_bytes = x_offset * self.data.strides[1]
|
||||
copy.dst_y = y_offset
|
||||
copy.dst_z = z_offset
|
||||
|
||||
# Set pitch of destination
|
||||
copy.dst_pitch = self.data.strides[0]
|
||||
|
||||
# Set width in bytes to copy for each row and
|
||||
# number of rows to copy
|
||||
width = max(self.nx, cpu_data.shape[2])
|
||||
height = max(self.ny, cpu_data.shape[1])
|
||||
depth = max(self.nz, cpu - data.shape[0])
|
||||
copy.width_in_bytes = width * cpu_data.itemsize
|
||||
copy.height = height
|
||||
copy.depth = depth
|
||||
|
||||
# Perform the copy
|
||||
copy(stream)
|
||||
|
||||
# self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
|
||||
|
||||
def __del__(self, *args):
|
||||
# self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
|
||||
self.data.gpudata.free()
|
||||
self.data = None
|
||||
|
||||
def download(self, stream, asynch=False):
|
||||
"""
|
||||
Enables downloading data from GPU to Python
|
||||
"""
|
||||
|
||||
# self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
|
||||
# Allocate host memory
|
||||
# cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
|
||||
# cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32)
|
||||
cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32)
|
||||
|
||||
# Create a copy object from device to host
|
||||
copy = cuda.Memcpy2D()
|
||||
copy.set_src_device(self.data.gpudata)
|
||||
copy.set_dst_host(cpu_data)
|
||||
|
||||
# Set offsets and pitch of a source
|
||||
copy.src_x_in_bytes = self.x_halo * self.data.strides[1]
|
||||
copy.src_y = self.y_halo
|
||||
copy.src_z = self.z_halo
|
||||
copy.src_pitch = self.data.strides[0]
|
||||
|
||||
# Set width in bytes to copy for each row and
|
||||
# number of rows to copy
|
||||
copy.width_in_bytes = self.nx * cpu_data.itemsize
|
||||
copy.height = self.ny
|
||||
copy.depth = self.nz
|
||||
|
||||
copy(stream)
|
||||
if not asynch:
|
||||
stream.synchronize()
|
||||
|
||||
return cpu_data
|
79
GPUSimulators/common/data_dumper.py
Normal file
79
GPUSimulators/common/data_dumper.py
Normal file
@ -0,0 +1,79 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import netCDF4
|
||||
import numpy as np
|
||||
|
||||
|
||||
def to_json(in_dict):
|
||||
out_dict = in_dict.copy()
|
||||
|
||||
for key in out_dict:
|
||||
if isinstance(out_dict[key], np.ndarray):
|
||||
out_dict[key] = out_dict[key].tolist()
|
||||
else:
|
||||
try:
|
||||
json.dumps(out_dict[key])
|
||||
except:
|
||||
out_dict[key] = str(out_dict[key])
|
||||
|
||||
return json.dumps(out_dict)
|
||||
|
||||
|
||||
class DataDumper(object):
|
||||
"""
|
||||
Simple class for holding a netCDF4 object
|
||||
(handles opening and closing nicely)
|
||||
Use as
|
||||
with DataDumper("filename") as data:
|
||||
...
|
||||
"""
|
||||
|
||||
def __init__(self, filename, *args, **kwargs):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Create directory if needed
|
||||
filename = os.path.abspath(filename)
|
||||
dirname = os.path.dirname(filename)
|
||||
if dirname and not os.path.isdir(dirname):
|
||||
self.logger.info("Creating directory " + dirname)
|
||||
os.makedirs(dirname)
|
||||
|
||||
# Get mode of a file if we have that
|
||||
mode = None
|
||||
if args:
|
||||
mode = args[0]
|
||||
elif kwargs and 'mode' in kwargs.keys():
|
||||
mode = kwargs['mode']
|
||||
|
||||
# Create a new unique file if writing
|
||||
if mode:
|
||||
if ("w" in mode) or ("+" in mode) or ("a" in mode):
|
||||
i = 0
|
||||
stem, ext = os.path.splitext(filename)
|
||||
while os.path.isfile(filename):
|
||||
filename = f"{stem}_{str(i).zfill(4)}{ext}"
|
||||
i = i + 1
|
||||
self.filename = os.path.abspath(filename)
|
||||
|
||||
# Save arguments
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
# Log output
|
||||
self.logger.info("Initialized " + self.filename)
|
||||
|
||||
def __enter__(self):
|
||||
self.logger.info("Opening " + self.filename)
|
||||
if self.args:
|
||||
self.logger.info("Arguments: " + str(self.args))
|
||||
if self.kwargs:
|
||||
self.logger.info("Keyword arguments: " + str(self.kwargs))
|
||||
self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs)
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.logger.info("Closing " + self.filename)
|
||||
self.ncfile.close()
|
||||
|
101
GPUSimulators/common/ip_engine.py
Normal file
101
GPUSimulators/common/ip_engine.py
Normal file
@ -0,0 +1,101 @@
|
||||
import gc
|
||||
import logging
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import time
|
||||
|
||||
from GPUSimulators.common.popen_file_buffer import PopenFileBuffer
|
||||
|
||||
|
||||
class IPEngine(object):
|
||||
"""
|
||||
Class for starting IPEngines for MPI processing in IPython
|
||||
"""
|
||||
|
||||
def __init__(self, n_engines):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Start ipcontroller
|
||||
self.logger.info("Starting IPController")
|
||||
self.c_buff = PopenFileBuffer()
|
||||
c_cmd = ["ipcontroller", "--ip='*'"]
|
||||
c_params = dict()
|
||||
c_params['stderr'] = self.c_buff.stderr
|
||||
c_params['stdout'] = self.c_buff.stdout
|
||||
c_params['shell'] = False
|
||||
if os.name == 'nt':
|
||||
c_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
self.c = subprocess.Popen(c_cmd, **c_params)
|
||||
|
||||
# Wait until the controller is running
|
||||
time.sleep(3)
|
||||
|
||||
# Start engines
|
||||
self.logger.info("Starting IPEngines")
|
||||
self.e_buff = PopenFileBuffer()
|
||||
e_cmd = ["mpiexec", "-n", str(n_engines), "ipengine", "--mpi"]
|
||||
e_params = dict()
|
||||
e_params['stderr'] = self.e_buff.stderr
|
||||
e_params['stdout'] = self.e_buff.stdout
|
||||
e_params['shell'] = False
|
||||
if os.name == 'nt':
|
||||
e_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP
|
||||
self.e = subprocess.Popen(e_cmd, **e_params)
|
||||
|
||||
# attach to a running cluster
|
||||
import ipyparallel
|
||||
self.cluster = ipyparallel.Client() # profile='mpi')
|
||||
time.sleep(3)
|
||||
while len(self.cluster.ids) != n_engines:
|
||||
time.sleep(0.5)
|
||||
self.logger.info("Waiting for cluster...")
|
||||
self.cluster = ipyparallel.Client() # profile='mpi')
|
||||
|
||||
self.logger.info("Done")
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self):
|
||||
if self.e is not None:
|
||||
if os.name == 'nt':
|
||||
self.logger.warning("Sending CTRL+C to IPEngine")
|
||||
self.e.send_signal(signal.CTRL_C_EVENT)
|
||||
|
||||
try:
|
||||
self.e.communicate(timeout=3)
|
||||
self.e.kill()
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.warning("Killing IPEngine")
|
||||
self.e.kill()
|
||||
self.e.communicate()
|
||||
self.e = None
|
||||
|
||||
cout, cerr = self.e_buff.read()
|
||||
self.logger.info(f"IPEngine cout: {cout}")
|
||||
self.logger.info(f"IPEngine cerr: {cerr}")
|
||||
self.e_buff = None
|
||||
|
||||
gc.collect()
|
||||
|
||||
if self.c is not None:
|
||||
if os.name == 'nt':
|
||||
self.logger.warning("Sending CTRL+C to IPController")
|
||||
self.c.send_signal(signal.CTRL_C_EVENT)
|
||||
|
||||
try:
|
||||
self.c.communicate(timeout=3)
|
||||
self.c.kill()
|
||||
except subprocess.TimeoutExpired:
|
||||
self.logger.warning("Killing IPController")
|
||||
self.c.kill()
|
||||
self.c.communicate()
|
||||
self.c = None
|
||||
|
||||
cout, cerr = self.c_buff.read()
|
||||
self.logger.info(f"IPController cout: {cout}")
|
||||
self.logger.info(f"IPController cerr: {cerr}")
|
||||
self.c_buff = None
|
||||
|
||||
gc.collect()
|
27
GPUSimulators/common/popen_file_buffer.py
Normal file
27
GPUSimulators/common/popen_file_buffer.py
Normal file
@ -0,0 +1,27 @@
|
||||
import tempfile
|
||||
|
||||
|
||||
class PopenFileBuffer(object):
|
||||
"""
|
||||
Simple class for holding a set of temp files
|
||||
for communicating with a subprocess
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.stdout = tempfile.TemporaryFile(mode='w+t')
|
||||
self.stderr = tempfile.TemporaryFile(mode='w+t')
|
||||
|
||||
def __del__(self):
|
||||
self.stdout.close()
|
||||
self.stderr.close()
|
||||
|
||||
def read(self):
|
||||
self.stdout.seek(0)
|
||||
cout = self.stdout.read()
|
||||
self.stdout.seek(0, 2)
|
||||
|
||||
self.stderr.seek(0)
|
||||
cerr = self.stderr.read()
|
||||
self.stderr.seek(0, 2)
|
||||
|
||||
return cout, cerr
|
62
GPUSimulators/common/progress_printer.py
Normal file
62
GPUSimulators/common/progress_printer.py
Normal file
@ -0,0 +1,62 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def time_string(seconds):
|
||||
seconds = int(max(seconds, 1))
|
||||
minutes, seconds = divmod(seconds, 60)
|
||||
hours, minutes = divmod(minutes, 60)
|
||||
periods = [('h', hours), ('m', minutes), ('s', seconds)]
|
||||
return_string = ' '.join('{}{}'.format(value, name)
|
||||
for name, value in periods
|
||||
if value)
|
||||
return return_string
|
||||
|
||||
|
||||
def progress_bar(step, total_steps, width=30):
|
||||
progress = np.round(width * step / total_steps).astype(np.int32)
|
||||
progressbar = "0% [" + "#" * progress + "=" * (width - progress) + "] 100%"
|
||||
return progressbar
|
||||
|
||||
|
||||
class ProgressPrinter(object):
|
||||
"""
|
||||
Small helper class for creating a progress bar
|
||||
"""
|
||||
|
||||
def __init__(self, total_steps, print_every=5):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.start = time.time()
|
||||
self.total_steps = total_steps
|
||||
self.print_every = print_every
|
||||
self.next_print_time = self.print_every
|
||||
self.last_step = 0
|
||||
self.secs_per_iter = None
|
||||
|
||||
def get_print_string(self, step):
|
||||
elapsed = time.time() - self.start
|
||||
if elapsed > self.next_print_time:
|
||||
dt = elapsed - (self.next_print_time - self.print_every)
|
||||
dsteps = step - self.last_step
|
||||
steps_remaining = self.total_steps - step
|
||||
|
||||
if dsteps == 0:
|
||||
return None
|
||||
|
||||
self.last_step = step
|
||||
self.next_print_time = elapsed + self.print_every
|
||||
|
||||
if not self.secs_per_iter:
|
||||
self.secs_per_iter = dt / dsteps
|
||||
self.secs_per_iter = 0.2 * self.secs_per_iter + 0.8 * (dt / dsteps)
|
||||
|
||||
remaining_time = steps_remaining * self.secs_per_iter
|
||||
|
||||
return (f"{progress_bar(step, self.total_steps)}. "
|
||||
+ f"Total: {time_string(elapsed + remaining_time)}, "
|
||||
+ f"elapsed: {time_string(elapsed)}, "
|
||||
+ f"remaining: {time_string(remaining_time)}")
|
||||
|
||||
return None
|
26
GPUSimulators/common/timer.py
Normal file
26
GPUSimulators/common/timer.py
Normal file
@ -0,0 +1,26 @@
|
||||
import logging
|
||||
import time
|
||||
|
||||
|
||||
class Timer(object):
|
||||
"""
|
||||
Class which keeps track of time spent for a section of code
|
||||
"""
|
||||
|
||||
def __init__(self, tag, log_level=logging.DEBUG):
|
||||
self.tag = tag
|
||||
self.log_level = log_level
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def __enter__(self):
|
||||
self.start = time.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.end = time.time()
|
||||
self.secs = self.end - self.start
|
||||
self.msecs = self.secs * 1000 # milliseconds
|
||||
self.logger.log(self.log_level, f"{self.tag}: {self.msecs} ms")
|
||||
|
||||
def elapsed(self):
|
||||
return time.time() - self.start
|
@ -0,0 +1,2 @@
|
||||
from .cuda_context import CudaContext
|
||||
from .hip_context import HIPContext
|
@ -21,8 +21,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
import re
|
||||
import io
|
||||
import hashlib
|
||||
@ -33,8 +31,8 @@ import pycuda.compiler as cuda_compiler
|
||||
import pycuda.gpuarray
|
||||
import pycuda.driver as cuda
|
||||
|
||||
from GPUSimulators import Autotuner, Common
|
||||
from GPUSimulators.gpu.Context import Context
|
||||
from GPUSimulators import Autotuner
|
||||
from GPUSimulators.common import common
|
||||
|
||||
|
||||
class CudaContext(object):
|
@ -3,10 +3,10 @@ import io
|
||||
import os.path
|
||||
|
||||
import hip as hip_main
|
||||
from hip import hip, hiprtc
|
||||
from hip import hip
|
||||
|
||||
from GPUSimulators import Common
|
||||
from GPUSimulators.gpu.Context import Context
|
||||
from GPUSimulators.common import common
|
||||
from GPUSimulators.gpu.context import Context
|
||||
|
||||
|
||||
class HIPContext(Context):
|
@ -52,9 +52,7 @@
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from GPUSimulators import IPythonMagic"
|
||||
]
|
||||
"source": ""
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@ -115,10 +113,10 @@
|
||||
"import numpy as np\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"from mpi4py import MPI\n",
|
||||
"import time\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from GPUSimulators import IPythonMagic, MPISimulator, Common"
|
||||
"from GPUSimulators import MPISimulator\n",
|
||||
"from GPUSimulators.common import common"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -317,7 +315,6 @@
|
||||
"%%px\n",
|
||||
"\n",
|
||||
"from GPUSimulators.helpers import InitialConditions\n",
|
||||
"from GPUSimulators.Simulator import BoundaryCondition\n",
|
||||
"\n",
|
||||
"my_context.autotuner = None\n",
|
||||
"\n",
|
||||
@ -348,7 +345,7 @@
|
||||
" return sim\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)"
|
||||
"outfile = Common.run_simulation(genSim, arguments, outfile, save_times, save_var_names)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -657,7 +654,7 @@
|
||||
" sim = MPISimulator.MPISimulator(local_sim, grid)\n",
|
||||
" return sim\n",
|
||||
"\n",
|
||||
"outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)"
|
||||
"outfile = Common.run_simulation(genSim, arguments, outfile, save_times, save_var_names)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -13,19 +13,10 @@
|
||||
"%load_ext line_profiler\n",
|
||||
"\n",
|
||||
"#Import packages we need\n",
|
||||
"import numpy as np\n",
|
||||
"from matplotlib import animation, rc\n",
|
||||
"from matplotlib import pyplot as plt\n",
|
||||
"\n",
|
||||
"import subprocess\n",
|
||||
"import os\n",
|
||||
"import gc\n",
|
||||
"import datetime\n",
|
||||
"import importlib\n",
|
||||
"import logging\n",
|
||||
"\n",
|
||||
"import pycuda.driver as cuda\n",
|
||||
"import pycuda.compiler\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" from StringIO import StringIO\n",
|
||||
@ -37,7 +28,7 @@
|
||||
"rc('figure', figsize=(16.0, 12.0))\n",
|
||||
"rc('animation', html='html5')\n",
|
||||
"\n",
|
||||
"from GPUSimulators import Common, IPythonMagic\n",
|
||||
"from GPUSimulators.common import common\n",
|
||||
"from GPUSimulators.helpers import InitialConditions"
|
||||
]
|
||||
},
|
||||
@ -129,7 +120,7 @@
|
||||
" h = sim.u0[0].download(sim.stream)\n",
|
||||
" \n",
|
||||
" plt.figure()\n",
|
||||
" plt.title(str(sim) + \", t=\" + str(sim.simTime()) + \", nt=\" + str(sim.simSteps()))\n",
|
||||
" plt.title(str(sim) + \", t=\" + str(sim.sim_time()) + \", nt=\" + str(sim.sim_steps()))\n",
|
||||
" extent = [0, sim.dx*sim.nx, 0, sim.dy*sim.ny]\n",
|
||||
" plt.imshow(h, vmin=0.49, vmax=0.52, extent=extent)\n",
|
||||
" plt.colorbar()"
|
||||
@ -292,16 +283,16 @@
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[10], line 5\u001b[0m\n\u001b[1;32m 2\u001b[0m importlib\u001b[38;5;241m.\u001b[39mreload(KP07)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Common\u001b[38;5;241m.\u001b[39mTimer(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconstruct\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m t:\n\u001b[0;32m----> 5\u001b[0m sim \u001b[38;5;241m=\u001b[39m \u001b[43mKP07\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mKP07\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43marguments\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Common\u001b[38;5;241m.\u001b[39mTimer(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstep\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m t:\n\u001b[1;32m 8\u001b[0m t \u001b[38;5;241m=\u001b[39m sim\u001b[38;5;241m.\u001b[39msimulate(t_end)\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/KP07.py:70\u001b[0m, in \u001b[0;36mKP07.__init__\u001b[0;34m(self, context, h0, hu0, hv0, nx, ny, dx, dy, g, theta, cfl_scale, order, boundary_conditions, block_width, block_height, dt, compile_opts)\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;124;03mInitialization routine\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[38;5;124;03m\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124;03m compile_opts: Pass a list of nvcc compiler options\u001b[39;00m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;66;03m# Call super constructor\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcontext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[43m \u001b[49m\u001b[43mnx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 72\u001b[0m \u001b[43m \u001b[49m\u001b[43mdx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[43m \u001b[49m\u001b[43mboundary_conditions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 74\u001b[0m \u001b[43m \u001b[49m\u001b[43mcfl_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 75\u001b[0m \u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 76\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock_width\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_height\u001b[49m\u001b[43m)\u001b[49m;\n\u001b[1;32m 77\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mg \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mfloat32(g) \n\u001b[1;32m 78\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtheta \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mfloat32(theta) \n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Simulator.py:146\u001b[0m, in \u001b[0;36mBaseSimulator.__init__\u001b[0;34m(self, context, nx, ny, dx, dy, boundary_conditions, cfl_scale, num_substeps, block_width, block_height)\u001b[0m\n\u001b[1;32m 144\u001b[0m \u001b[38;5;66;03m#Handle autotuning block size\u001b[39;00m\n\u001b[1;32m 145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontext\u001b[38;5;241m.\u001b[39mautotuner:\n\u001b[0;32m--> 146\u001b[0m peak_configuration \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_peak_performance\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__class__\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 147\u001b[0m block_width \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(peak_configuration[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 148\u001b[0m block_height \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(peak_configuration[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:121\u001b[0m, in \u001b[0;36mAutotuner.get_peak_performance\u001b[0;34m(self, simulator)\u001b[0m\n\u001b[1;32m 119\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not get autotuned peak performance for \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: benchmarking\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[1;32m 120\u001b[0m data\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m--> 121\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbenchmark\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 122\u001b[0m data \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfilename)\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfind_max_index\u001b[39m(megacells):\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:84\u001b[0m, in \u001b[0;36mAutotuner.benchmark\u001b[0;34m(self, simulator, force)\u001b[0m\n\u001b[1;32m 81\u001b[0m benchmark_data[k] \u001b[38;5;241m=\u001b[39m v\n\u001b[1;32m 83\u001b[0m \u001b[38;5;66;03m# Run benchmark\u001b[39;00m\n\u001b[0;32m---> 84\u001b[0m benchmark_data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_megacells\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mAutotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbenchmark_single_simulator\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marguments\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_widths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_heights\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 85\u001b[0m benchmark_data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_block_widths\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblock_widths\n\u001b[1;32m 86\u001b[0m benchmark_data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_block_heights\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblock_heights\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:162\u001b[0m, in \u001b[0;36mAutotuner.benchmark_single_simulator\u001b[0;34m(simulator, arguments, block_widths, block_heights)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i, block_width \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(block_widths):\n\u001b[1;32m 161\u001b[0m sim_arguments\u001b[38;5;241m.\u001b[39mupdate({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m'\u001b[39m: block_width})\n\u001b[0;32m--> 162\u001b[0m megacells[j, i] \u001b[38;5;241m=\u001b[39m \u001b[43mAutotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_benchmark\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msim_arguments\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompleted \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m in \u001b[39m\u001b[38;5;132;01m%f\u001b[39;00m\u001b[38;5;124m seconds\u001b[39m\u001b[38;5;124m\"\u001b[39m, simulator\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, t\u001b[38;5;241m.\u001b[39msecs)\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m megacells\n",
|
||||
"File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:200\u001b[0m, in \u001b[0;36mAutotuner.run_benchmark\u001b[0;34m(simulator, arguments, timesteps, warmup_timesteps)\u001b[0m\n\u001b[1;32m 197\u001b[0m end\u001b[38;5;241m.\u001b[39mrecord(sim\u001b[38;5;241m.\u001b[39mstream)\n\u001b[1;32m 199\u001b[0m \u001b[38;5;66;03m#Synchronize end event\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[43mend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msynchronize\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;66;03m#Compute megacells\u001b[39;00m\n\u001b[1;32m 203\u001b[0m gpu_elapsed \u001b[38;5;241m=\u001b[39m end\u001b[38;5;241m.\u001b[39mtime_since(start)\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m1.0e-3\u001b[39m\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[0;32mIn[10], line 5\u001B[0m\n\u001B[1;32m 2\u001B[0m importlib\u001B[38;5;241m.\u001B[39mreload(KP07)\n\u001B[1;32m 4\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Common\u001B[38;5;241m.\u001B[39mTimer(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mconstruct\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m t:\n\u001B[0;32m----> 5\u001B[0m sim \u001B[38;5;241m=\u001B[39m \u001B[43mKP07\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mKP07\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43marguments\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 7\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Common\u001B[38;5;241m.\u001B[39mTimer(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mstep\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m t:\n\u001B[1;32m 8\u001B[0m t \u001B[38;5;241m=\u001B[39m sim\u001B[38;5;241m.\u001B[39msimulate(t_end)\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/KP07.py:70\u001B[0m, in \u001B[0;36mKP07.__init__\u001B[0;34m(self, context, h0, hu0, hv0, nx, ny, dx, dy, g, theta, cfl_scale, order, boundary_conditions, block_width, block_height, dt, compile_opts)\u001B[0m\n\u001B[1;32m 53\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 54\u001B[0m \u001B[38;5;124;03mInitialization routine\u001B[39;00m\n\u001B[1;32m 55\u001B[0m \u001B[38;5;124;03m\u001B[39;00m\n\u001B[0;32m (...)\u001B[0m\n\u001B[1;32m 66\u001B[0m \u001B[38;5;124;03m compile_opts: Pass a list of nvcc compiler options\u001B[39;00m\n\u001B[1;32m 67\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m 69\u001B[0m \u001B[38;5;66;03m# Call super constructor\u001B[39;00m\n\u001B[0;32m---> 70\u001B[0m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;21;43m__init__\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\n\u001B[1;32m 71\u001B[0m \u001B[43m \u001B[49m\u001B[43mnx\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mny\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\n\u001B[1;32m 72\u001B[0m \u001B[43m \u001B[49m\u001B[43mdx\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdy\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\n\u001B[1;32m 73\u001B[0m \u001B[43m \u001B[49m\u001B[43mboundary_conditions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 74\u001B[0m \u001B[43m \u001B[49m\u001B[43mcfl_scale\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 75\u001B[0m \u001B[43m \u001B[49m\u001B[43morder\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m 76\u001B[0m \u001B[43m \u001B[49m\u001B[43mblock_width\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mblock_height\u001B[49m\u001B[43m)\u001B[49m;\n\u001B[1;32m 77\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mg \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mfloat32(g) \n\u001B[1;32m 78\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtheta \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mfloat32(theta) \n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Simulator.py:146\u001B[0m, in \u001B[0;36mBaseSimulator.__init__\u001B[0;34m(self, context, nx, ny, dx, dy, boundary_conditions, cfl_scale, num_substeps, block_width, block_height)\u001B[0m\n\u001B[1;32m 144\u001B[0m \u001B[38;5;66;03m#Handle autotuning block size\u001B[39;00m\n\u001B[1;32m 145\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcontext\u001B[38;5;241m.\u001B[39mautotuner:\n\u001B[0;32m--> 146\u001B[0m peak_configuration \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcontext\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;18;43m__class__\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m 147\u001B[0m block_width \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mint\u001B[39m(peak_configuration[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m])\n\u001B[1;32m 148\u001B[0m block_height \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mint\u001B[39m(peak_configuration[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m])\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:121\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m 119\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCould not get autotuned peak performance for \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m: benchmarking\u001B[39m\u001B[38;5;124m\"\u001B[39m, key)\n\u001B[1;32m 120\u001B[0m data\u001B[38;5;241m.\u001B[39mclose()\n\u001B[0;32m--> 121\u001B[0m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbenchmark\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 122\u001B[0m data \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mload(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfilename)\n\u001B[1;32m 124\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:84\u001B[0m, in \u001B[0;36mAutotuner.benchmark\u001B[0;34m(self, simulator, force)\u001B[0m\n\u001B[1;32m 81\u001B[0m benchmark_data[k] \u001B[38;5;241m=\u001B[39m v\n\u001B[1;32m 83\u001B[0m \u001B[38;5;66;03m# Run benchmark\u001B[39;00m\n\u001B[0;32m---> 84\u001B[0m benchmark_data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_megacells\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[43mAutotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbenchmark_single_simulator\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43marguments\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mblock_widths\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mblock_heights\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 85\u001B[0m benchmark_data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mblock_widths\n\u001B[1;32m 86\u001B[0m benchmark_data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mblock_heights\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:162\u001B[0m, in \u001B[0;36mAutotuner.benchmark_single_simulator\u001B[0;34m(simulator, arguments, block_widths, block_heights)\u001B[0m\n\u001B[1;32m 160\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i, block_width \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(block_widths):\n\u001B[1;32m 161\u001B[0m sim_arguments\u001B[38;5;241m.\u001B[39mupdate({\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m: block_width})\n\u001B[0;32m--> 162\u001B[0m megacells[j, i] \u001B[38;5;241m=\u001B[39m \u001B[43mAutotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_benchmark\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msim_arguments\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 165\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCompleted \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m in \u001B[39m\u001B[38;5;132;01m%f\u001B[39;00m\u001B[38;5;124m seconds\u001B[39m\u001B[38;5;124m\"\u001B[39m, simulator\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, t\u001B[38;5;241m.\u001B[39msecs)\n\u001B[1;32m 167\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m megacells\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:200\u001B[0m, in \u001B[0;36mAutotuner.run_benchmark\u001B[0;34m(simulator, arguments, timesteps, warmup_timesteps)\u001B[0m\n\u001B[1;32m 197\u001B[0m end\u001B[38;5;241m.\u001B[39mrecord(sim\u001B[38;5;241m.\u001B[39mstream)\n\u001B[1;32m 199\u001B[0m \u001B[38;5;66;03m#Synchronize end event\u001B[39;00m\n\u001B[0;32m--> 200\u001B[0m \u001B[43mend\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43msynchronize\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 202\u001B[0m \u001B[38;5;66;03m#Compute megacells\u001B[39;00m\n\u001B[1;32m 203\u001B[0m gpu_elapsed \u001B[38;5;241m=\u001B[39m end\u001B[38;5;241m.\u001B[39mtime_since(start)\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m1.0e-3\u001B[39m\n",
|
||||
"\u001B[0;31mKeyboardInterrupt\u001B[0m: "
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -34,8 +34,9 @@ from mpi4py import MPI
|
||||
import pycuda.driver as cuda
|
||||
|
||||
# Simulator engine etc
|
||||
from GPUSimulators import MPISimulator, Common
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
from GPUSimulators import MPISimulator
|
||||
from GPUSimulators.common import common
|
||||
from GPUSimulators.gpu import cuda_context
|
||||
from GPUSimulators import EE2D_KP07_dimsplit
|
||||
from GPUSimulators.helpers import InitialConditions as IC
|
||||
|
||||
@ -147,7 +148,7 @@ def genSim(grid, **kwargs):
|
||||
return sim
|
||||
|
||||
|
||||
outfile, sim_runner_profiling_data, sim_profiling_data = Common.runSimulation(
|
||||
outfile, sim_runner_profiling_data, sim_profiling_data = Common.run_simulation(
|
||||
genSim, arguments, outfile, save_times, save_var_names, dt)
|
||||
|
||||
if(args.profile):
|
||||
@ -183,8 +184,8 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):
|
||||
profiling_data["slurm_job_id"] = job_id
|
||||
profiling_data["n_cuda_devices"] = str(num_cuda_devices)
|
||||
profiling_data["n_processes"] = str(MPI.COMM_WORLD.size)
|
||||
profiling_data["git_hash"] = Common.getGitHash()
|
||||
profiling_data["git_status"] = Common.getGitStatus()
|
||||
profiling_data["git_hash"] = Common.get_git_hash()
|
||||
profiling_data["git_status"] = Common.get_git_status()
|
||||
|
||||
with open(profiling_file, "w") as write_file:
|
||||
json.dump(profiling_data, write_file)
|
||||
|
@ -25,7 +25,8 @@ import gc
|
||||
import logging
|
||||
|
||||
#Simulator engine etc
|
||||
from GPUSimulators import SHMEMSimulatorGroup, Common
|
||||
from GPUSimulators import SHMEMSimulatorGroup
|
||||
from GPUSimulators.common import common
|
||||
from GPUSimulators import EE2D_KP07_dimsplit
|
||||
from GPUSimulators.helpers import InitialConditions as IC
|
||||
|
||||
@ -99,7 +100,7 @@ def genSim(sims, grid, **kwargs):
|
||||
sim = SHMEMSimulatorGroup.SHMEMSimulatorGroup(sims, grid)
|
||||
return sim
|
||||
|
||||
outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)
|
||||
outfile = Common.run_simulation(genSim, arguments, outfile, save_times, save_var_names)
|
||||
|
||||
|
||||
|
||||
|
@ -28,8 +28,8 @@ import logging
|
||||
import pycuda.driver as cuda
|
||||
|
||||
# Simulator engine etc
|
||||
from GPUSimulators import Common
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
from GPUSimulators.common import common
|
||||
from GPUSimulators.gpu import cuda_context
|
||||
from GPUSimulators import EE2D_KP07_dimsplit
|
||||
from GPUSimulators.helpers import InitialConditions as IC
|
||||
|
||||
@ -104,7 +104,7 @@ def genSim(**kwargs):
|
||||
return local_sim
|
||||
|
||||
|
||||
outfile = Common.runSimulation(
|
||||
outfile = Common.run_simulation(
|
||||
genSim, arguments, outfile, save_times, save_var_names)
|
||||
|
||||
####
|
||||
|
Loading…
x
Reference in New Issue
Block a user