mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:17:41 +01:00 
			
		
		
		
	refactor(kernel): split Common.py to a separate package
This commit is contained in:
		
							parent
							
								
									8f24cd45ea
								
							
						
					
					
						commit
						c54f08c417
					
				| @ -27,16 +27,11 @@ | ||||
|     "from matplotlib import pyplot as plt\n", | ||||
|     "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", | ||||
|     "\n", | ||||
|     "import subprocess\n", | ||||
|     "import os\n", | ||||
|     "import gc\n", | ||||
|     "import datetime\n", | ||||
|     "import importlib\n", | ||||
|     "import logging\n", | ||||
|     "from socket import gethostname\n", | ||||
|     "\n", | ||||
|     "import pycuda.driver as cuda\n", | ||||
|     "import pycuda.compiler\n", | ||||
|     "\n", | ||||
|     "try:\n", | ||||
|     "    from StringIO import StringIO\n", | ||||
| @ -55,7 +50,8 @@ | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from GPUSimulators import Common, IPythonMagic, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner" | ||||
|     "from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner\n", | ||||
|     "from GPUSimulators.common import common" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @ -124,14 +120,14 @@ | ||||
|      "evalue": "All-NaN slice encountered", | ||||
|      "output_type": "error", | ||||
|      "traceback": [ | ||||
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||||
|       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)", | ||||
|       "Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m simulators \u001b[38;5;241m=\u001b[39m [LxF\u001b[38;5;241m.\u001b[39mLxF, FORCE\u001b[38;5;241m.\u001b[39mFORCE, HLL\u001b[38;5;241m.\u001b[39mHLL, HLL2\u001b[38;5;241m.\u001b[39mHLL2, KP07\u001b[38;5;241m.\u001b[39mKP07, KP07_dimsplit\u001b[38;5;241m.\u001b[39mKP07_dimsplit, WAF\u001b[38;5;241m.\u001b[39mWAF]\n\u001b[0;32m----> 2\u001b[0m peak_performance \u001b[38;5;241m=\u001b[39m [autotuner\u001b[38;5;241m.\u001b[39mget_peak_performance(simulator) \u001b[38;5;28;01mfor\u001b[39;00m simulator \u001b[38;5;129;01min\u001b[39;00m simulators]\n\u001b[1;32m      3\u001b[0m megacells \u001b[38;5;241m=\u001b[39m [performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmegacells\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m performance \u001b[38;5;129;01min\u001b[39;00m peak_performance]\n\u001b[1;32m      4\u001b[0m xlabels \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{:s}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124mx\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(simulators[i]\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m'\u001b[39m], performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;28;01mfor\u001b[39;00m i, performance \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(peak_performance)]\n", | ||||
|       "Cell \u001b[0;32mIn[9], line 2\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m      1\u001b[0m simulators \u001b[38;5;241m=\u001b[39m [LxF\u001b[38;5;241m.\u001b[39mLxF, FORCE\u001b[38;5;241m.\u001b[39mFORCE, HLL\u001b[38;5;241m.\u001b[39mHLL, HLL2\u001b[38;5;241m.\u001b[39mHLL2, KP07\u001b[38;5;241m.\u001b[39mKP07, KP07_dimsplit\u001b[38;5;241m.\u001b[39mKP07_dimsplit, WAF\u001b[38;5;241m.\u001b[39mWAF]\n\u001b[0;32m----> 2\u001b[0m peak_performance \u001b[38;5;241m=\u001b[39m [\u001b[43mautotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_peak_performance\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mfor\u001b[39;00m simulator \u001b[38;5;129;01min\u001b[39;00m simulators]\n\u001b[1;32m      3\u001b[0m megacells \u001b[38;5;241m=\u001b[39m [performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmegacells\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m performance \u001b[38;5;129;01min\u001b[39;00m peak_performance]\n\u001b[1;32m      4\u001b[0m xlabels \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{:s}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m[\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124mx\u001b[39m\u001b[38;5;132;01m{:d}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(simulators[i]\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m'\u001b[39m], performance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m'\u001b[39m]) \u001b[38;5;28;01mfor\u001b[39;00m i, performance \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(peak_performance)]\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001b[0m, in \u001b[0;36mAutotuner.get_peak_performance\u001b[0;34m(self, simulator)\u001b[0m\n\u001b[1;32m    130\u001b[0m block_widths \u001b[38;5;241m=\u001b[39m data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_block_widths\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m    131\u001b[0m block_heights \u001b[38;5;241m=\u001b[39m data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m_block_heights\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m--> 132\u001b[0m j, i \u001b[38;5;241m=\u001b[39m \u001b[43mfind_max_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmegacells\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    134\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mperformance[key] \u001b[38;5;241m=\u001b[39m { \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m\"\u001b[39m: block_widths[i],\n\u001b[1;32m    135\u001b[0m                          \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m\"\u001b[39m: block_heights[j],\n\u001b[1;32m    136\u001b[0m                          \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmegacells\u001b[39m\u001b[38;5;124m\"\u001b[39m: megacells[j, i] }\n\u001b[1;32m    137\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mReturning \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m as peak performance parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mperformance[key])\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001b[0m, in \u001b[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001b[0;34m(megacells)\u001b[0m\n\u001b[1;32m    125\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfind_max_index\u001b[39m(megacells):\n\u001b[0;32m--> 126\u001b[0m     max_index \u001b[38;5;241m=\u001b[39m \u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnanargmax\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmegacells\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    127\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m np\u001b[38;5;241m.\u001b[39munravel_index(max_index, megacells\u001b[38;5;241m.\u001b[39mshape)\n", | ||||
|       "File \u001b[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001b[0m, in \u001b[0;36mnanargmax\u001b[0;34m(a, axis, out, keepdims)\u001b[0m\n\u001b[1;32m    611\u001b[0m     mask \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mall(mask, axis\u001b[38;5;241m=\u001b[39maxis)\n\u001b[1;32m    612\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39many(mask):\n\u001b[0;32m--> 613\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAll-NaN slice encountered\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    614\u001b[0m res \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39margmax(a, axis\u001b[38;5;241m=\u001b[39maxis, out\u001b[38;5;241m=\u001b[39mout, keepdims\u001b[38;5;241m=\u001b[39mkeepdims)\n\u001b[1;32m    615\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", | ||||
|       "\u001b[0;31mValueError\u001b[0m: All-NaN slice encountered" | ||||
|       "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", | ||||
|       "\u001B[0;31mValueError\u001B[0m                                Traceback (most recent call last)", | ||||
|       "Cell \u001B[0;32mIn[9], line 2\u001B[0m\n\u001B[1;32m      1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [autotuner\u001B[38;5;241m.\u001B[39mget_peak_performance(simulator) \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m      3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m      4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n", | ||||
|       "Cell \u001B[0;32mIn[9], line 2\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m      1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m      3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m      4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m    130\u001B[0m block_widths \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m    131\u001B[0m block_heights \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[0;32m--> 132\u001B[0m j, i \u001B[38;5;241m=\u001B[39m \u001B[43mfind_max_index\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    134\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key] \u001B[38;5;241m=\u001B[39m { \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_widths[i],\n\u001B[1;32m    135\u001B[0m                          \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_heights[j],\n\u001B[1;32m    136\u001B[0m                          \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m\"\u001B[39m: megacells[j, i] }\n\u001B[1;32m    137\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mReturning \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m as peak performance parameters\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key])\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001B[0;34m(megacells)\u001B[0m\n\u001B[1;32m    125\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n\u001B[0;32m--> 126\u001B[0m     max_index \u001B[38;5;241m=\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnanargmax\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    127\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m np\u001B[38;5;241m.\u001B[39munravel_index(max_index, megacells\u001B[38;5;241m.\u001B[39mshape)\n", | ||||
|       "File \u001B[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001B[0m, in \u001B[0;36mnanargmax\u001B[0;34m(a, axis, out, keepdims)\u001B[0m\n\u001B[1;32m    611\u001B[0m     mask \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mall(mask, axis\u001B[38;5;241m=\u001B[39maxis)\n\u001B[1;32m    612\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m np\u001B[38;5;241m.\u001B[39many(mask):\n\u001B[0;32m--> 613\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll-NaN slice encountered\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    614\u001B[0m res \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39margmax(a, axis\u001B[38;5;241m=\u001B[39maxis, out\u001B[38;5;241m=\u001B[39mout, keepdims\u001B[38;5;241m=\u001B[39mkeepdims)\n\u001B[1;32m    615\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m res\n", | ||||
|       "\u001B[0;31mValueError\u001B[0m: All-NaN slice encountered" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|  | ||||
| @ -49,9 +49,6 @@ | ||||
|     "import time\n", | ||||
|     "import os\n", | ||||
|     "import gc\n", | ||||
|     "import datetime\n", | ||||
|     "\n", | ||||
|     "import pycuda.driver as cuda\n", | ||||
|     "\n", | ||||
|     "try:\n", | ||||
|     "    from StringIO import StringIO\n", | ||||
| @ -59,7 +56,8 @@ | ||||
|     "    from io import StringIO\n", | ||||
|     "\n", | ||||
|     "#Finally, import our simulator\n", | ||||
|     "from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, IPythonMagic" | ||||
|     "from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF\n", | ||||
|     "from GPUSimulators.common import common" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | ||||
| @ -42,15 +42,10 @@ | ||||
|     "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", | ||||
|     "#import mpld3\n", | ||||
|     "\n", | ||||
|     "import subprocess\n", | ||||
|     "import socket\n", | ||||
|     "import time\n", | ||||
|     "import os\n", | ||||
|     "import gc\n", | ||||
|     "import datetime\n", | ||||
|     "import logging\n", | ||||
|     "\n", | ||||
|     "import pycuda.driver as cuda\n", | ||||
|     "\n", | ||||
|     "try:\n", | ||||
|     "    from StringIO import StringIO\n", | ||||
| @ -65,7 +60,8 @@ | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Finally, import our simulator\n", | ||||
|     "from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, IPythonMagic\n", | ||||
|     "from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF\n", | ||||
|     "from GPUSimulators.common import common\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions" | ||||
|    ] | ||||
|   }, | ||||
| @ -250,8 +246,8 @@ | ||||
|     "            sim.simulate(1.0, dt=dt)\n", | ||||
|     "            sim.check()\n", | ||||
|     "            \n", | ||||
|     "            nt = sim.simSteps()\n", | ||||
|     "            dt = sim.simTime() / nt\n", | ||||
|     "            nt = sim.sim_steps()\n", | ||||
|     "            dt = sim.sim_time() / nt\n", | ||||
|     "            h, hu, hv = sim.download()\n", | ||||
|     "            \n", | ||||
|     "            if (transpose):\n", | ||||
|  | ||||
| @ -42,15 +42,10 @@ | ||||
|     "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", | ||||
|     "#import mpld3\n", | ||||
|     "\n", | ||||
|     "import subprocess\n", | ||||
|     "import socket\n", | ||||
|     "import time\n", | ||||
|     "import os\n", | ||||
|     "import gc\n", | ||||
|     "import datetime\n", | ||||
|     "import logging\n", | ||||
|     "\n", | ||||
|     "import pycuda.driver as cuda\n", | ||||
|     "\n", | ||||
|     "try:\n", | ||||
|     "    from StringIO import StringIO\n", | ||||
| @ -65,7 +60,8 @@ | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "#Finally, import our simulator\n", | ||||
|     "from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, IPythonMagic\n", | ||||
|     "from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF\n", | ||||
|     "from GPUSimulators.common import common\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions" | ||||
|    ] | ||||
|   }, | ||||
| @ -250,8 +246,8 @@ | ||||
|     "            sim.simulate(1.0, dt=dt)\n", | ||||
|     "            sim.check()\n", | ||||
|     "            \n", | ||||
|     "            nt = sim.simSteps()\n", | ||||
|     "            dt = sim.simTime() / nt\n", | ||||
|     "            nt = sim.sim_steps()\n", | ||||
|     "            dt = sim.sim_time() / nt\n", | ||||
|     "            h, hu, hv = sim.download()\n", | ||||
|     "            \n", | ||||
|     "            if (transpose):\n", | ||||
|  | ||||
							
								
								
									
										141864
									
								
								EulerTesting.ipynb
									
									
									
									
									
								
							
							
						
						
									
										141864
									
								
								EulerTesting.ipynb
									
									
									
									
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -29,15 +29,159 @@ from tqdm.auto import tqdm | ||||
| 
 | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from GPUSimulators import Common, Simulator | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import common, Timer | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| 
 | ||||
| 
 | ||||
| def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2): | ||||
|     """ | ||||
|     Runs a benchmark, and returns the number of megacells achieved | ||||
|     """ | ||||
| 
 | ||||
|     logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|     # Initialize simulator | ||||
|     try: | ||||
|         sim = simulator(**arguments) | ||||
|     except: | ||||
|         # An exception raised - not possible to continue | ||||
|         logger.debug("Failed creating %s with arguments %s", simulator.__name__, str(arguments)) | ||||
|         # raise RuntimeError("Failed creating %s with arguments %s", simulator.__name__, str(arguments)) | ||||
|         return np.nan | ||||
| 
 | ||||
|     # Create timer events | ||||
|     start = cuda.Event() | ||||
|     end = cuda.Event() | ||||
| 
 | ||||
|     # Warmup | ||||
|     for i in range(warmup_timesteps): | ||||
|         sim.substep(sim.dt, i) | ||||
| 
 | ||||
|     # Run simulation with timer | ||||
|     start.record(sim.stream) | ||||
|     for i in range(timesteps): | ||||
|         sim.substep(sim.dt, i) | ||||
|     end.record(sim.stream) | ||||
| 
 | ||||
|     # Synchronize end event | ||||
|     end.synchronize() | ||||
| 
 | ||||
|     # Compute megacells | ||||
|     gpu_elapsed = end.time_since(start) * 1.0e-3 | ||||
|     megacells = (sim.nx * sim.ny * timesteps / (1000 * 1000)) / gpu_elapsed | ||||
| 
 | ||||
|     # Sanity check solution | ||||
|     h, hu, hv = sim.download() | ||||
|     sane = True | ||||
|     sane = sane and sanity_check(0.3, 0.7) | ||||
|     sane = sane and sanity_check(-0.2, 0.2) | ||||
|     sane = sane and sanity_check(-0.2, 0.2) | ||||
| 
 | ||||
|     if sane: | ||||
|         logger.debug("%s [%d x %d] succeeded: %f megacells, gpu elapsed %f", simulator.__name__, | ||||
|                      arguments["block_width"], arguments["block_height"], megacells, gpu_elapsed) | ||||
|         return megacells | ||||
|     else: | ||||
|         logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], | ||||
|                      arguments["block_height"], gpu_elapsed) | ||||
|         # raise RuntimeError("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed) | ||||
|         return np.nan | ||||
| 
 | ||||
| 
 | ||||
| def gen_test_data(nx, ny, g): | ||||
|     """ | ||||
|     Generates test dataset | ||||
|     """ | ||||
| 
 | ||||
|     width = 100.0 | ||||
|     height = 100.0 | ||||
|     dx = width / float(nx) | ||||
|     dy = height / float(ny) | ||||
| 
 | ||||
|     x_center = dx * nx / 2.0 | ||||
|     y_center = dy * ny / 2.0 | ||||
| 
 | ||||
|     # Create a gaussian "dam break" that will not form shocks | ||||
|     size = width / 5.0 | ||||
|     dt = 10 ** 10 | ||||
| 
 | ||||
|     h = np.zeros((ny, nx), dtype=np.float32) | ||||
|     hu = np.zeros((ny, nx), dtype=np.float32) | ||||
|     hv = np.zeros((ny, nx), dtype=np.float32) | ||||
| 
 | ||||
|     extent = 1.0 / np.sqrt(2.0) | ||||
|     x = (dx * (np.arange(0, nx, dtype=np.float32) + 0.5) - x_center) / size | ||||
|     y = (dy * (np.arange(0, ny, dtype=np.float32) + 0.5) - y_center) / size | ||||
|     xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy') | ||||
|     r = np.minimum(1.0, np.sqrt(xv ** 2 + yv ** 2)) | ||||
|     xv = None | ||||
|     yv = None | ||||
|     gc.collect() | ||||
| 
 | ||||
|     # Generate highres | ||||
|     cos = np.cos(np.pi * r) | ||||
|     h = 0.5 + 0.1 * 0.5 * (1.0 + cos) | ||||
|     hu = 0.1 * 0.5 * (1.0 + cos) | ||||
|     hv = hu.copy() | ||||
| 
 | ||||
|     scale = 0.7 | ||||
|     max_h_estimate = 0.6 | ||||
|     max_u_estimate = 0.1 * np.sqrt(2.0) | ||||
|     dx = width / nx | ||||
|     dy = height / ny | ||||
|     dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(g * max_h_estimate)) | ||||
| 
 | ||||
|     return h, hu, hv, dx, dy, dt | ||||
| 
 | ||||
| 
 | ||||
| def sanity_check(variable, bound_min, bound_max): | ||||
|     """ | ||||
|     Checks that a variable is "sane" | ||||
|     """ | ||||
| 
 | ||||
|     maxval = np.amax(variable) | ||||
|     minval = np.amin(variable) | ||||
|     if (np.isnan(maxval) | ||||
|             or np.isnan(minval) | ||||
|             or maxval > bound_max | ||||
|             or minval < bound_min): | ||||
|         return False | ||||
|     else: | ||||
|         return True | ||||
| 
 | ||||
| 
 | ||||
| def benchmark_single_simulator(simulator, arguments, block_widths, block_heights): | ||||
|     """ | ||||
|     Runs a set of benchmarks for a single simulator | ||||
|     """ | ||||
| 
 | ||||
|     logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|     megacells = np.empty((len(block_heights), len(block_widths))) | ||||
|     megacells.fill(np.nan) | ||||
| 
 | ||||
|     logger.debug("Running %d benchmarks with %s", len(block_heights) * len(block_widths), simulator.__name__) | ||||
| 
 | ||||
|     sim_arguments = arguments.copy() | ||||
| 
 | ||||
|     with Timer(simulator.__name__) as t: | ||||
|         for j, block_height in enumerate(tqdm(block_heights, desc='Autotuner Progress')): | ||||
|             sim_arguments.update({'block_height': block_height}) | ||||
|             for i, block_width in enumerate(tqdm(block_widths, desc=f'Iteration {j} Progress', leave=False)): | ||||
|                 sim_arguments.update({'block_width': block_width}) | ||||
|                 megacells[j, i] = run_benchmark(sim_arguments) | ||||
| 
 | ||||
|     logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs) | ||||
| 
 | ||||
|     return megacells | ||||
| 
 | ||||
| 
 | ||||
| class Autotuner: | ||||
|     def __init__(self,  | ||||
|                 nx=2048, ny=2048,  | ||||
|                 block_widths=range(8, 32, 1), | ||||
|                 block_heights=range(8, 32, 1)): | ||||
|     def __init__(self, | ||||
|                  nx=2048, ny=2048, | ||||
|                  block_widths=range(8, 32, 1), | ||||
|                  block_heights=range(8, 32, 1)): | ||||
|         logger = logging.getLogger(__name__) | ||||
|         self.filename = "autotuning_data_" + gethostname() + ".npz" | ||||
|         self.nx = nx | ||||
| @ -48,50 +192,51 @@ class Autotuner: | ||||
| 
 | ||||
|     def benchmark(self, simulator, force=False): | ||||
|         logger = logging.getLogger(__name__) | ||||
|          | ||||
|         #Run through simulators and benchmark | ||||
| 
 | ||||
|         # Run through simulators and benchmark | ||||
|         key = str(simulator.__name__) | ||||
|         logger.info("Benchmarking %s to %s", key, self.filename) | ||||
|          | ||||
|         #If this simulator has been benchmarked already, skip it | ||||
|         if (force==False and os.path.isfile(self.filename)): | ||||
| 
 | ||||
|         # If this simulator has been benchmarked already, skip it | ||||
|         if force == False and os.path.isfile(self.filename): | ||||
|             with np.load(self.filename) as data: | ||||
|                 if key in data["simulators"]: | ||||
|                     logger.info("%s already benchmarked - skipping", key) | ||||
|                     return | ||||
|      | ||||
| 
 | ||||
|         # Set arguments to send to the simulators during construction | ||||
|         context = CudaContext.CudaContext(autotuning=False) | ||||
|         context = CudaContext(autotuning=False) | ||||
|         g = 9.81 | ||||
|         h0, hu0, hv0, dx, dy, dt = Autotuner.gen_test_data(nx=self.nx, ny=self.ny, g=g) | ||||
|         h0, hu0, hv0, dx, dy, dt = gen_test_data(ny=self.ny, g=g) | ||||
|         arguments = { | ||||
|             'context': context, | ||||
|             'h0': h0, 'hu0': hu0, 'hv0': hv0, | ||||
|             'nx': self.nx, 'ny': self.ny, | ||||
|             'dx': dx, 'dy': dy, 'dt': 0.9*dt, | ||||
|             'dx': dx, 'dy': dy, 'dt': 0.9 * dt, | ||||
|             'g': g, | ||||
|             'compile_opts': ['-Wno-deprecated-gpu-targets'] | ||||
|         }  | ||||
|               | ||||
|         } | ||||
| 
 | ||||
|         # Load existing data into memory | ||||
|         benchmark_data = { | ||||
|                 "simulators": [], | ||||
|             "simulators": [], | ||||
|         } | ||||
|         if (os.path.isfile(self.filename)): | ||||
|         if os.path.isfile(self.filename): | ||||
|             with np.load(self.filename) as data: | ||||
|                 for k, v in data.items(): | ||||
|                     benchmark_data[k] = v | ||||
|     | ||||
| 
 | ||||
|         # Run benchmark | ||||
|         benchmark_data[key + "_megacells"] = Autotuner.benchmark_single_simulator(simulator, arguments, self.block_widths, self.block_heights) | ||||
|         benchmark_data[key + "_megacells"] = benchmark_single_simulator(arguments, self.block_widths, | ||||
|                                                                         self.block_heights) | ||||
|         benchmark_data[key + "_block_widths"] = self.block_widths | ||||
|         benchmark_data[key + "_block_heights"] = self.block_heights | ||||
|         benchmark_data[key + "_arguments"] = str(arguments) | ||||
|          | ||||
| 
 | ||||
|         existing_sims = benchmark_data["simulators"] | ||||
|         if (isinstance(existing_sims, np.ndarray)): | ||||
|         if isinstance(existing_sims, np.ndarray): | ||||
|             existing_sims = existing_sims.tolist() | ||||
|         if (key not in existing_sims): | ||||
|         if key not in existing_sims: | ||||
|             benchmark_data["simulators"] = existing_sims + [key] | ||||
| 
 | ||||
|         # Save to file | ||||
| @ -104,178 +249,40 @@ class Autotuner: | ||||
|         """ | ||||
| 
 | ||||
|         logger = logging.getLogger(__name__) | ||||
|          | ||||
| 
 | ||||
|         assert issubclass(simulator, Simulator.BaseSimulator) | ||||
|         key = simulator.__name__ | ||||
|          | ||||
|         if (key in self.performance): | ||||
| 
 | ||||
|         if key in self.performance: | ||||
|             return self.performance[key] | ||||
|         else: | ||||
|             #Run simulation if required | ||||
|             if (not os.path.isfile(self.filename)): | ||||
|             # Run simulation if required | ||||
|             if not os.path.isfile(self.filename): | ||||
|                 logger.debug("Could not get autotuned peak performance for %s: benchmarking", key) | ||||
|                 self.benchmark(simulator) | ||||
|              | ||||
| 
 | ||||
|             with np.load(self.filename) as data: | ||||
|                 if key not in data['simulators']: | ||||
|                     logger.debug("Could not get autotuned peak performance for %s: benchmarking", key) | ||||
|                     data.close() | ||||
|                     self.benchmark(simulator) | ||||
|                     data = np.load(self.filename) | ||||
|                  | ||||
| 
 | ||||
|                 def find_max_index(megacells): | ||||
|                     max_index = np.nanargmax(megacells) | ||||
|                     return np.unravel_index(max_index, megacells.shape) | ||||
|                  | ||||
| 
 | ||||
|                 megacells = data[key + '_megacells'] | ||||
|                 block_widths = data[key + '_block_widths'] | ||||
|                 block_heights = data[key + '_block_heights'] | ||||
|                 j, i = find_max_index(megacells) | ||||
|                  | ||||
|                 self.performance[key] = { "block_width": block_widths[i], | ||||
| 
 | ||||
|                 self.performance[key] = {"block_width": block_widths[i], | ||||
|                                          "block_height": block_heights[j], | ||||
|                                          "megacells": megacells[j, i] } | ||||
|                                          "megacells": megacells[j, i]} | ||||
|                 logger.debug("Returning %s as peak performance parameters", self.performance[key]) | ||||
|                 return self.performance[key] | ||||
|          | ||||
|             #This should never happen | ||||
| 
 | ||||
|             # This should never happen | ||||
|             raise "Something wrong: Could not get autotuning data!" | ||||
|             return None | ||||
|      | ||||
|     def benchmark_single_simulator(simulator, arguments, block_widths, block_heights): | ||||
|         """ | ||||
|         Runs a set of benchmarks for a single simulator | ||||
|         """ | ||||
| 
 | ||||
|         logger = logging.getLogger(__name__) | ||||
|          | ||||
|         megacells = np.empty((len(block_heights), len(block_widths))) | ||||
|         megacells.fill(np.nan) | ||||
| 
 | ||||
|         logger.debug("Running %d benchmarks with %s", len(block_heights)*len(block_widths), simulator.__name__) | ||||
|          | ||||
|         sim_arguments = arguments.copy() | ||||
|                      | ||||
|         with Common.Timer(simulator.__name__) as t: | ||||
|             for j, block_height in enumerate(tqdm(block_heights, desc='Autotuner Progress')): | ||||
|                 sim_arguments.update({'block_height': block_height}) | ||||
|                 for i, block_width in enumerate(tqdm(block_widths, desc=f'Iteration {j} Progress', leave=False)): | ||||
|                     sim_arguments.update({'block_width': block_width}) | ||||
|                     megacells[j, i] = Autotuner.run_benchmark(simulator, sim_arguments) | ||||
|                          | ||||
| 
 | ||||
|         logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs) | ||||
| 
 | ||||
|         return megacells | ||||
|              | ||||
|     def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2): | ||||
|         """ | ||||
|         Runs a benchmark, and returns the number of megacells achieved | ||||
|         """ | ||||
| 
 | ||||
|         logger = logging.getLogger(__name__) | ||||
|          | ||||
|         #Initialize simulator | ||||
|         try: | ||||
|             sim = simulator(**arguments) | ||||
|         except: | ||||
|             #An exception raised - not possible to continue | ||||
|             logger.debug("Failed creating %s with arguments %s", simulator.__name__, str(arguments)) | ||||
|             # raise RuntimeError("Failed creating %s with arguments %s", simulator.__name__, str(arguments)) | ||||
|             return np.nan | ||||
|          | ||||
|         #Create timer events | ||||
|         start = cuda.Event() | ||||
|         end = cuda.Event() | ||||
|          | ||||
|         #Warmup | ||||
|         for i in range(warmup_timesteps): | ||||
|             sim.substep(sim.dt, i) | ||||
|              | ||||
|         #Run simulation with timer         | ||||
|         start.record(sim.stream) | ||||
|         for i in range(timesteps): | ||||
|             sim.substep(sim.dt, i) | ||||
|         end.record(sim.stream) | ||||
|          | ||||
|         #Synchronize end event | ||||
|         end.synchronize() | ||||
|          | ||||
|         #Compute megacells | ||||
|         gpu_elapsed = end.time_since(start)*1.0e-3 | ||||
|         megacells = (sim.nx*sim.ny*timesteps / (1000*1000)) / gpu_elapsed | ||||
| 
 | ||||
|         #Sanity check solution | ||||
|         h, hu, hv = sim.download() | ||||
|         sane = True | ||||
|         sane = sane and Autotuner.sanity_check(h, 0.3, 0.7) | ||||
|         sane = sane and Autotuner.sanity_check(hu, -0.2, 0.2) | ||||
|         sane = sane and Autotuner.sanity_check(hv, -0.2, 0.2) | ||||
|          | ||||
|         if (sane): | ||||
|             logger.debug("%s [%d x %d] succeeded: %f megacells, gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], megacells, gpu_elapsed) | ||||
|             return megacells | ||||
|         else: | ||||
|             logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed) | ||||
|             # raise RuntimeError("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed) | ||||
|             return np.nan | ||||
|          | ||||
|     def gen_test_data(nx, ny, g): | ||||
|         """ | ||||
|         Generates test dataset | ||||
|         """ | ||||
| 
 | ||||
|         width = 100.0 | ||||
|         height = 100.0 | ||||
|         dx = width / float(nx) | ||||
|         dy = height / float(ny) | ||||
| 
 | ||||
|         x_center = dx*nx/2.0 | ||||
|         y_center = dy*ny/2.0 | ||||
| 
 | ||||
|         #Create a gaussian "dam break" that will not form shocks | ||||
|         size = width / 5.0 | ||||
|         dt = 10**10 | ||||
|          | ||||
|         h  = np.zeros((ny, nx), dtype=np.float32);  | ||||
|         hu = np.zeros((ny, nx), dtype=np.float32); | ||||
|         hv = np.zeros((ny, nx), dtype=np.float32); | ||||
| 
 | ||||
|         extent = 1.0/np.sqrt(2.0) | ||||
|         x = (dx*(np.arange(0, nx, dtype=np.float32)+0.5) - x_center) / size | ||||
|         y = (dy*(np.arange(0, ny, dtype=np.float32)+0.5) - y_center) / size | ||||
|         xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy') | ||||
|         r = np.minimum(1.0, np.sqrt(xv**2 + yv**2)) | ||||
|         xv = None | ||||
|         yv = None | ||||
|         gc.collect() | ||||
| 
 | ||||
|         #Generate highres | ||||
|         cos = np.cos(np.pi*r) | ||||
|         h = 0.5 + 0.1*0.5*(1.0 + cos) | ||||
|         hu = 0.1*0.5*(1.0 + cos) | ||||
|         hv = hu.copy() | ||||
|          | ||||
|         scale = 0.7 | ||||
|         max_h_estimate = 0.6 | ||||
|         max_u_estimate = 0.1*np.sqrt(2.0) | ||||
|         dx = width/nx | ||||
|         dy = height/ny | ||||
|         dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(g*max_h_estimate)) | ||||
|          | ||||
|         return h, hu, hv, dx, dy, dt | ||||
|          | ||||
|     def sanity_check(variable, bound_min, bound_max): | ||||
|         """ | ||||
|         Checks that a variable is "sane" | ||||
|         """ | ||||
| 
 | ||||
|         maxval = np.amax(variable) | ||||
|         minval = np.amin(variable) | ||||
|         if (np.isnan(maxval)  | ||||
|                 or np.isnan(minval) | ||||
|                 or maxval > bound_max | ||||
|                 or minval < bound_min): | ||||
|             return False | ||||
|         else: | ||||
|             return True | ||||
| @ -1,758 +0,0 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| """ | ||||
| This python module implements the different helper functions and  | ||||
| classes | ||||
| 
 | ||||
| Copyright (C) 2018  SINTEF ICT | ||||
| 
 | ||||
| This program is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| import os | ||||
| 
 | ||||
| import numpy as np | ||||
| import time | ||||
| import signal | ||||
| import subprocess | ||||
| import tempfile | ||||
| import re | ||||
| import io | ||||
| import hashlib | ||||
| import logging | ||||
| import gc | ||||
| import netCDF4 | ||||
| import json | ||||
| 
 | ||||
| import pycuda.compiler as cuda_compiler | ||||
| import pycuda.gpuarray | ||||
| import pycuda.driver as cuda | ||||
| from pycuda.tools import PageLockedMemoryPool | ||||
| 
 | ||||
| 
 | ||||
| def safeCall(cmd): | ||||
|     logger = logging.getLogger(__name__) | ||||
|     try: | ||||
|         #git rev-parse HEAD | ||||
|         current_dir = os.path.dirname(os.path.realpath(__file__)) | ||||
|         params = dict() | ||||
|         params['stderr'] = subprocess.STDOUT | ||||
|         params['cwd'] = current_dir | ||||
|         params['universal_newlines'] = True #text=True in more recent python | ||||
|         params['shell'] = False | ||||
|         if os.name == 'nt': | ||||
|             params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP | ||||
|         stdout = subprocess.check_output(cmd, **params) | ||||
|     except subprocess.CalledProcessError as e: | ||||
|         output = e.output | ||||
|         logger.error("Git failed, \nReturn code: " + str(e.returncode) + "\nOutput: " + output) | ||||
|         raise e | ||||
| 
 | ||||
|     return stdout | ||||
| 
 | ||||
| 
 | ||||
| def getGitHash(): | ||||
|     return safeCall(["git", "rev-parse", "HEAD"]) | ||||
| 
 | ||||
| 
 | ||||
| def getGitStatus(): | ||||
|     return safeCall(["git", "status", "--porcelain", "-uno"]) | ||||
| 
 | ||||
| 
 | ||||
| def toJson(in_dict, compressed=True): | ||||
|     """ | ||||
|     Creates JSON string from a dictionary | ||||
|     """ | ||||
| 
 | ||||
|     logger = logging.getLogger(__name__) | ||||
|     out_dict = in_dict.copy() | ||||
|     for key in out_dict: | ||||
|         if isinstance(out_dict[key], np.ndarray): | ||||
|             out_dict[key] = out_dict[key].tolist() | ||||
|         else: | ||||
|             try: | ||||
|                 json.dumps(out_dict[key]) | ||||
|             except: | ||||
|                 value = str(out_dict[key]) | ||||
|                 logger.warning("JSON: Converting {:s} to string ({:s})".format(key, value)) | ||||
|                 out_dict[key] = value | ||||
|     return json.dumps(out_dict) | ||||
| 
 | ||||
| 
 | ||||
| def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None): | ||||
|     """ | ||||
|     Runs a simulation, and stores output in netcdf file. Stores the times given in  | ||||
|     save_times, and saves all of the variables in list save_var_names. Elements in   | ||||
|     save_var_names can be set to None if you do not want to save them | ||||
|     """ | ||||
| 
 | ||||
|     profiling_data_sim_runner = { 'start': {}, 'end': {} } | ||||
|     profiling_data_sim_runner["start"]["t_sim_init"] = 0 | ||||
|     profiling_data_sim_runner["end"]["t_sim_init"] = 0 | ||||
|     profiling_data_sim_runner["start"]["t_nc_write"] = 0 | ||||
|     profiling_data_sim_runner["end"]["t_nc_write"] = 0 | ||||
|     profiling_data_sim_runner["start"]["t_full_step"] = 0 | ||||
|     profiling_data_sim_runner["end"]["t_full_step"] = 0 | ||||
| 
 | ||||
|     profiling_data_sim_runner["start"]["t_sim_init"] = time.time() | ||||
| 
 | ||||
|     logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|     assert len(save_times) > 0, "Need to specify which times to save" | ||||
| 
 | ||||
|     with Timer("construct") as t: | ||||
|         sim = simulator(**simulator_args) | ||||
|     logger.info("Constructed in " + str(t.secs) + " seconds") | ||||
| 
 | ||||
|     #Create netcdf file and simulate | ||||
|     with DataDumper(outfile, mode='w', clobber=False) as outdata: | ||||
|          | ||||
|         #Create attributes (metadata) | ||||
|         outdata.ncfile.created = time.ctime(time.time()) | ||||
|         outdata.ncfile.git_hash = getGitHash() | ||||
|         outdata.ncfile.git_status = getGitStatus() | ||||
|         outdata.ncfile.simulator = str(simulator) | ||||
|          | ||||
|         # do not write fields to attributes (they are to large) | ||||
|         simulator_args_for_ncfile = simulator_args.copy() | ||||
|         del simulator_args_for_ncfile["rho"] | ||||
|         del simulator_args_for_ncfile["rho_u"] | ||||
|         del simulator_args_for_ncfile["rho_v"] | ||||
|         del simulator_args_for_ncfile["E"] | ||||
|         outdata.ncfile.sim_args = toJson(simulator_args_for_ncfile) | ||||
|          | ||||
|         #Create dimensions | ||||
|         outdata.ncfile.createDimension('time', len(save_times)) | ||||
|         outdata.ncfile.createDimension('x', simulator_args['nx']) | ||||
|         outdata.ncfile.createDimension('y', simulator_args['ny']) | ||||
| 
 | ||||
|         #Create variables for dimensions | ||||
|         ncvars = {} | ||||
|         ncvars['time'] = outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time') | ||||
|         ncvars['x']    = outdata.ncfile.createVariable(   'x', np.dtype('float32').char,    'x') | ||||
|         ncvars['y']    = outdata.ncfile.createVariable(   'y', np.dtype('float32').char,    'y') | ||||
|          | ||||
|         #Fill variables with proper values | ||||
|         ncvars['time'][:] = save_times | ||||
|         extent = sim.getExtent() | ||||
|         ncvars['x'][:] = np.linspace(extent[0], extent[1], simulator_args['nx']) | ||||
|         ncvars['y'][:] = np.linspace(extent[2], extent[3], simulator_args['ny']) | ||||
|          | ||||
|         #Choose which variables to download (prune None from list, but keep the index) | ||||
|         download_vars = [] | ||||
|         for i, var_name in enumerate(save_var_names): | ||||
|             if var_name is not None: | ||||
|                 download_vars += [i] | ||||
|         save_var_names = list(save_var_names[i] for i in download_vars) | ||||
|          | ||||
|         #Create variables | ||||
|         for var_name in save_var_names: | ||||
|             ncvars[var_name] = outdata.ncfile.createVariable(var_name, np.dtype('float32').char, ('time', 'y', 'x'), zlib=True, least_significant_digit=3) | ||||
| 
 | ||||
|         #Create step sizes between each save | ||||
|         t_steps = np.empty_like(save_times) | ||||
|         t_steps[0] = save_times[0] | ||||
|         t_steps[1:] = save_times[1:] - save_times[0:-1] | ||||
| 
 | ||||
|         profiling_data_sim_runner["end"]["t_sim_init"] = time.time() | ||||
| 
 | ||||
|         #Start simulation loop | ||||
|         progress_printer = ProgressPrinter(save_times[-1], print_every=10) | ||||
|         for k in range(len(save_times)): | ||||
|             #Get target time and step size there | ||||
|             t_step = t_steps[k] | ||||
|             t_end = save_times[k] | ||||
|              | ||||
|             #Sanity check simulator | ||||
|             try: | ||||
|                 sim.check() | ||||
|             except AssertionError as e: | ||||
|                 logger.error("Error after {:d} steps (t={:f}: {:s}".format(sim.simSteps(), sim.simTime(), str(e))) | ||||
|                 return outdata.filename | ||||
| 
 | ||||
|             profiling_data_sim_runner["start"]["t_full_step"] += time.time() | ||||
| 
 | ||||
|             #Simulate | ||||
|             if (t_step > 0.0): | ||||
|                 sim.simulate(t_step, dt) | ||||
| 
 | ||||
|             profiling_data_sim_runner["end"]["t_full_step"] += time.time() | ||||
| 
 | ||||
|             profiling_data_sim_runner["start"]["t_nc_write"] += time.time() | ||||
| 
 | ||||
|             #Download | ||||
|             save_vars = sim.download(download_vars) | ||||
|              | ||||
|             #Save to file | ||||
|             for i, var_name in enumerate(save_var_names): | ||||
|                 ncvars[var_name][k, :] = save_vars[i] | ||||
| 
 | ||||
|             profiling_data_sim_runner["end"]["t_nc_write"] += time.time() | ||||
| 
 | ||||
|             #Write progress to screen | ||||
|             print_string = progress_printer.getPrintString(t_end) | ||||
|             if (print_string): | ||||
|                 logger.debug(print_string) | ||||
|                  | ||||
|         logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(t_end, sim.simSteps(), sim.simTime() / sim.simSteps())) | ||||
| 
 | ||||
|     return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi | ||||
| 
 | ||||
| 
 | ||||
| class Timer(object): | ||||
|     """ | ||||
|     Class which keeps track of time spent for a section of code | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, tag, log_level=logging.DEBUG): | ||||
|         self.tag = tag | ||||
|         self.log_level = log_level | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|          | ||||
|     def __enter__(self): | ||||
|         self.start = time.time() | ||||
|         return self | ||||
|      | ||||
|     def __exit__(self, *args): | ||||
|         self.end = time.time() | ||||
|         self.secs = self.end - self.start | ||||
|         self.msecs = self.secs * 1000 # millisecs | ||||
|         self.logger.log(self.log_level, "%s: %f ms", self.tag, self.msecs) | ||||
| 
 | ||||
|     def elapsed(self): | ||||
|         return time.time() - self.start | ||||
| 
 | ||||
| 
 | ||||
| class PopenFileBuffer(object): | ||||
|     """ | ||||
|     Simple class for holding a set of tempfiles | ||||
|     for communicating with a subprocess | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         self.stdout = tempfile.TemporaryFile(mode='w+t') | ||||
|         self.stderr = tempfile.TemporaryFile(mode='w+t') | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         self.stdout.close() | ||||
|         self.stderr.close() | ||||
| 
 | ||||
|     def read(self): | ||||
|         self.stdout.seek(0) | ||||
|         cout = self.stdout.read() | ||||
|         self.stdout.seek(0, 2) | ||||
| 
 | ||||
|         self.stderr.seek(0) | ||||
|         cerr = self.stderr.read() | ||||
|         self.stderr.seek(0, 2) | ||||
| 
 | ||||
|         return cout, cerr | ||||
| 
 | ||||
| 
 | ||||
| class IPEngine(object): | ||||
|     """ | ||||
|     Class for starting IPEngines for MPI processing in IPython | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, n_engines): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|          | ||||
|         #Start ipcontroller | ||||
|         self.logger.info("Starting IPController") | ||||
|         self.c_buff = PopenFileBuffer() | ||||
|         c_cmd = ["ipcontroller",  "--ip='*'"] | ||||
|         c_params = dict() | ||||
|         c_params['stderr'] = self.c_buff.stderr | ||||
|         c_params['stdout'] = self.c_buff.stdout | ||||
|         c_params['shell'] = False | ||||
|         if os.name == 'nt': | ||||
|             c_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP | ||||
|         self.c = subprocess.Popen(c_cmd, **c_params) | ||||
|          | ||||
|         #Wait until controller is running | ||||
|         time.sleep(3) | ||||
|          | ||||
|         #Start engines | ||||
|         self.logger.info("Starting IPEngines") | ||||
|         self.e_buff = PopenFileBuffer() | ||||
|         e_cmd = ["mpiexec", "-n", str(n_engines), "ipengine", "--mpi"] | ||||
|         e_params = dict() | ||||
|         e_params['stderr'] = self.e_buff.stderr | ||||
|         e_params['stdout'] = self.e_buff.stdout | ||||
|         e_params['shell'] = False | ||||
|         if os.name == 'nt': | ||||
|             e_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP | ||||
|         self.e = subprocess.Popen(e_cmd, **e_params) | ||||
| 
 | ||||
|         # attach to a running cluster | ||||
|         import ipyparallel | ||||
|         self.cluster = ipyparallel.Client()#profile='mpi') | ||||
|         time.sleep(3) | ||||
|         while(len(self.cluster.ids) != n_engines): | ||||
|             time.sleep(0.5) | ||||
|             self.logger.info("Waiting for cluster...") | ||||
|             self.cluster = ipyparallel.Client()#profile='mpi') | ||||
|          | ||||
|         self.logger.info("Done") | ||||
|          | ||||
|     def __del__(self): | ||||
|         self.shutdown() | ||||
|      | ||||
|     def shutdown(self): | ||||
|         if (self.e is not None): | ||||
|             if (os.name == 'nt'): | ||||
|                 self.logger.warn("Sending CTRL+C to IPEngine") | ||||
|                 self.e.send_signal(signal.CTRL_C_EVENT) | ||||
|                  | ||||
|             try: | ||||
|                 self.e.communicate(timeout=3) | ||||
|                 self.e.kill() | ||||
|             except subprocess.TimeoutExpired: | ||||
|                 self.logger.warn("Killing IPEngine") | ||||
|                 self.e.kill() | ||||
|                 self.e.communicate() | ||||
|             self.e = None | ||||
|                  | ||||
|             cout, cerr = self.e_buff.read() | ||||
|             self.logger.info("IPEngine cout: {:s}".format(cout)) | ||||
|             self.logger.info("IPEngine cerr: {:s}".format(cerr)) | ||||
|             self.e_buff = None | ||||
|              | ||||
|             gc.collect() | ||||
|              | ||||
|         if (self.c is not None): | ||||
|             if (os.name == 'nt'): | ||||
|                 self.logger.warn("Sending CTRL+C to IPController") | ||||
|                 self.c.send_signal(signal.CTRL_C_EVENT) | ||||
|                  | ||||
|             try: | ||||
|                 self.c.communicate(timeout=3) | ||||
|                 self.c.kill() | ||||
|             except subprocess.TimeoutExpired: | ||||
|                 self.logger.warn("Killing IPController") | ||||
|                 self.c.kill() | ||||
|                 self.c.communicate() | ||||
|             self.c = None | ||||
|                  | ||||
|             cout, cerr = self.c_buff.read() | ||||
|             self.logger.info("IPController cout: {:s}".format(cout)) | ||||
|             self.logger.info("IPController cerr: {:s}".format(cerr)) | ||||
|             self.c_buff = None | ||||
|          | ||||
|             gc.collect() | ||||
| 
 | ||||
| 
 | ||||
| class DataDumper(object): | ||||
|     """ | ||||
|     Simple class for holding a netCDF4 object | ||||
|     (handles opening and closing in a nice way) | ||||
|     Use as  | ||||
|     with DataDumper("filename") as data: | ||||
|         ... | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, filename, *args, **kwargs): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|          | ||||
|         #Create directory if needed | ||||
|         filename = os.path.abspath(filename) | ||||
|         dirname = os.path.dirname(filename) | ||||
|         if dirname and not os.path.isdir(dirname): | ||||
|             self.logger.info("Creating directory " + dirname) | ||||
|             os.makedirs(dirname) | ||||
|          | ||||
|         #Get mode of file if we have that | ||||
|         mode = None | ||||
|         if (args): | ||||
|             mode = args[0] | ||||
|         elif (kwargs and 'mode' in kwargs.keys()): | ||||
|             mode = kwargs['mode'] | ||||
|              | ||||
|         #Create new unique file if writing | ||||
|         if (mode): | ||||
|             if (("w" in mode) or ("+" in mode) or ("a" in mode)): | ||||
|                 i = 0 | ||||
|                 stem, ext = os.path.splitext(filename) | ||||
|                 while (os.path.isfile(filename)): | ||||
|                     filename = "{:s}_{:04d}{:s}".format(stem, i, ext) | ||||
|                     i = i+1 | ||||
|         self.filename = os.path.abspath(filename) | ||||
|          | ||||
|         #Save arguments | ||||
|         self.args = args | ||||
|         self.kwargs = kwargs | ||||
|                  | ||||
|         #Log output | ||||
|         self.logger.info("Initialized " + self.filename) | ||||
|          | ||||
|     def __enter__(self): | ||||
|         self.logger.info("Opening " + self.filename) | ||||
|         if (self.args): | ||||
|             self.logger.info("Arguments: " + str(self.args)) | ||||
|         if (self.kwargs): | ||||
|             self.logger.info("Keyword arguments: " + str(self.kwargs)) | ||||
|         self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs) | ||||
|         return self | ||||
|          | ||||
|     def __exit__(self, *args): | ||||
|         self.logger.info("Closing " + self.filename) | ||||
|         self.ncfile.close() | ||||
|          | ||||
|     def toJson(in_dict): | ||||
|         out_dict = in_dict.copy() | ||||
| 
 | ||||
|         for key in out_dict: | ||||
|             if isinstance(out_dict[key], np.ndarray): | ||||
|                 out_dict[key] = out_dict[key].tolist() | ||||
|             else: | ||||
|                 try: | ||||
|                     json.dumps(out_dict[key]) | ||||
|                 except: | ||||
|                     out_dict[key] = str(out_dict[key]) | ||||
| 
 | ||||
|         return json.dumps(out_dict) | ||||
| 
 | ||||
| 
 | ||||
| class ProgressPrinter(object): | ||||
|     """ | ||||
|     Small helper class for  | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, total_steps, print_every=5): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|         self.start = time.time() | ||||
|         self.total_steps = total_steps | ||||
|         self.print_every = print_every | ||||
|         self.next_print_time = self.print_every | ||||
|         self.last_step = 0 | ||||
|         self.secs_per_iter = None | ||||
|          | ||||
|     def getPrintString(self, step): | ||||
|         elapsed =  time.time() - self.start | ||||
|         if (elapsed > self.next_print_time):             | ||||
|             dt = elapsed - (self.next_print_time - self.print_every) | ||||
|             dsteps = step - self.last_step | ||||
|             steps_remaining = self.total_steps - step | ||||
|                          | ||||
|             if (dsteps == 0): | ||||
|                 return | ||||
|                  | ||||
|             self.last_step = step | ||||
|             self.next_print_time = elapsed + self.print_every | ||||
|              | ||||
|             if not self.secs_per_iter: | ||||
|                 self.secs_per_iter = dt / dsteps | ||||
|             self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps) | ||||
|              | ||||
|             remaining_time = steps_remaining * self.secs_per_iter | ||||
| 
 | ||||
|             return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format( | ||||
|                 ProgressPrinter.progressBar(step, self.total_steps),  | ||||
|                 ProgressPrinter.timeString(elapsed + remaining_time),  | ||||
|                 ProgressPrinter.timeString(elapsed),  | ||||
|                 ProgressPrinter.timeString(remaining_time)) | ||||
| 
 | ||||
|     def timeString(seconds): | ||||
|         seconds = int(max(seconds, 1)) | ||||
|         minutes, seconds = divmod(seconds, 60) | ||||
|         hours, minutes = divmod(minutes, 60) | ||||
|         periods = [('h', hours), ('m', minutes), ('s', seconds)] | ||||
|         time_string = ' '.join('{}{}'.format(value, name) | ||||
|                                 for name, value in periods | ||||
|                                 if value) | ||||
|         return time_string | ||||
| 
 | ||||
|     def progressBar(step, total_steps, width=30): | ||||
|         progress = np.round(width * step / total_steps).astype(np.int32) | ||||
|         progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%" | ||||
|         return progressbar | ||||
| 
 | ||||
| 
 | ||||
| class CudaArray2D: | ||||
|     """ | ||||
|     Class that holds 2D data  | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32): | ||||
|         """ | ||||
|         Uploads initial data to the CUDA device | ||||
|         """ | ||||
| 
 | ||||
|         self.logger =  logging.getLogger(__name__) | ||||
|         self.nx = nx | ||||
|         self.ny = ny | ||||
|         self.x_halo = x_halo | ||||
|         self.y_halo = y_halo | ||||
|          | ||||
|         nx_halo = nx + 2*x_halo | ||||
|         ny_halo = ny + 2*y_halo | ||||
|          | ||||
|         #self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny) | ||||
|         #Should perhaps use pycuda.driver.mem_alloc_data.pitch() here | ||||
|         self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype) | ||||
|          | ||||
|         #For returning to download | ||||
|         self.memorypool = PageLockedMemoryPool() | ||||
|          | ||||
|         #If we don't have any data, just allocate and return | ||||
|         if cpu_data is None: | ||||
|             return | ||||
|              | ||||
|         #Make sure data is in proper format | ||||
|         assert cpu_data.shape == (ny_halo, nx_halo) or cpu_data.shape == (self.ny, self.nx), "Wrong shape of data %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo))) | ||||
|         assert cpu_data.itemsize == 4, "Wrong size of data type" | ||||
|         assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)" | ||||
| 
 | ||||
|         #Create copy object from host to device | ||||
|         x = (nx_halo - cpu_data.shape[1]) // 2 | ||||
|         y = (ny_halo - cpu_data.shape[0]) // 2 | ||||
|         self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]]) | ||||
|         #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny) | ||||
|          | ||||
|     def __del__(self, *args): | ||||
|         #self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny) | ||||
|         self.data.gpudata.free() | ||||
|         self.data = None | ||||
| 
 | ||||
|     def download(self, stream, cpu_data=None, asynch=False, extent=None): | ||||
|         """ | ||||
|         Enables downloading data from GPU to Python | ||||
|         """ | ||||
| 
 | ||||
|         if (extent is None): | ||||
|             x = self.x_halo | ||||
|             y = self.y_halo | ||||
|             nx = self.nx | ||||
|             ny = self.ny | ||||
|         else: | ||||
|             x, y, nx, ny = extent | ||||
|              | ||||
|         if (cpu_data is None): | ||||
|             #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) | ||||
|             #Allocate host memory | ||||
|             #The following fails, don't know why (crashes python) | ||||
|             cpu_data = cuda.pagelocked_empty((int(ny), int(nx)), dtype=np.float32, mem_flags=cuda.host_alloc_flags.PORTABLE) | ||||
|             #Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32) | ||||
|             #cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32) | ||||
|              | ||||
|         assert nx == cpu_data.shape[1] | ||||
|         assert ny == cpu_data.shape[0] | ||||
|         assert x+nx <= self.nx + 2*self.x_halo | ||||
|         assert y+ny <= self.ny + 2*self.y_halo | ||||
|          | ||||
|         #Create copy object from device to host | ||||
|         copy = cuda.Memcpy2D() | ||||
|         copy.set_src_device(self.data.gpudata) | ||||
|         copy.set_dst_host(cpu_data) | ||||
|          | ||||
|         #Set offsets and pitch of source | ||||
|         copy.src_x_in_bytes = int(x)*self.data.strides[1] | ||||
|         copy.src_y = int(y) | ||||
|         copy.src_pitch = self.data.strides[0] | ||||
|          | ||||
|         #Set width in bytes to copy for each row and | ||||
|         #number of rows to copy | ||||
|         copy.width_in_bytes = int(nx)*cpu_data.itemsize | ||||
|         copy.height = int(ny) | ||||
|          | ||||
|         copy(stream) | ||||
|         if asynch==False: | ||||
|             stream.synchronize() | ||||
|          | ||||
|         return cpu_data | ||||
|          | ||||
|     def upload(self, stream, cpu_data, extent=None): | ||||
|         if (extent is None): | ||||
|             x = self.x_halo | ||||
|             y = self.y_halo | ||||
|             nx = self.nx | ||||
|             ny = self.ny | ||||
|         else: | ||||
|             x, y, nx, ny = extent | ||||
|              | ||||
|         assert(nx == cpu_data.shape[1]) | ||||
|         assert(ny == cpu_data.shape[0]) | ||||
|         assert(x+nx <= self.nx + 2*self.x_halo) | ||||
|         assert(y+ny <= self.ny + 2*self.y_halo) | ||||
|           | ||||
|         #Create copy object from device to host | ||||
|         copy = cuda.Memcpy2D() | ||||
|         copy.set_dst_device(self.data.gpudata) | ||||
|         copy.set_src_host(cpu_data) | ||||
|          | ||||
|         #Set offsets and pitch of source | ||||
|         copy.dst_x_in_bytes = int(x)*self.data.strides[1] | ||||
|         copy.dst_y = int(y) | ||||
|         copy.dst_pitch = self.data.strides[0] | ||||
|          | ||||
|         #Set width in bytes to copy for each row and | ||||
|         #number of rows to copy | ||||
|         copy.width_in_bytes = int(nx)*cpu_data.itemsize | ||||
|         copy.height = int(ny) | ||||
|          | ||||
|         copy(stream) | ||||
| 
 | ||||
| 
 | ||||
| class CudaArray3D: | ||||
|     """ | ||||
|     Class that holds 3D data  | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32): | ||||
|         """ | ||||
|         Uploads initial data to the CL device | ||||
|         """ | ||||
| 
 | ||||
|         self.logger =  logging.getLogger(__name__) | ||||
|         self.nx = nx | ||||
|         self.ny = ny | ||||
|         self.nz = nz | ||||
|         self.x_halo = x_halo | ||||
|         self.y_halo = y_halo | ||||
|         self.z_halo = z_halo | ||||
|          | ||||
|         nx_halo = nx + 2*x_halo | ||||
|         ny_halo = ny + 2*y_halo | ||||
|         nz_halo = nz + 2*z_halo | ||||
|          | ||||
|         #self.logger.debug("Allocating [%dx%dx%d] buffer", self.nx, self.ny, self.nz) | ||||
|         #Should perhaps use pycuda.driver.mem_alloc_data.pitch() here | ||||
|         self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype) | ||||
|          | ||||
|         #For returning to download | ||||
|         self.memorypool = PageLockedMemoryPool() | ||||
|          | ||||
|         #If we don't have any data, just allocate and return | ||||
|         if cpu_data is None: | ||||
|             return | ||||
|              | ||||
|         #Make sure data is in proper format | ||||
|         assert cpu_data.shape == (nz_halo, ny_halo, nx_halo) or cpu_data.shape == (self.nz, self.ny, self.nx), "Wrong shape of data %s vs %s / %s" % (str(cpu_data.shape), str((self.nz, self.ny, self.nx)), str((nz_halo, ny_halo, nx_halo))) | ||||
|         assert cpu_data.itemsize == 4, "Wrong size of data type" | ||||
|         assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)" | ||||
|              | ||||
|         #Create copy object from host to device | ||||
|         copy = cuda.Memcpy3D() | ||||
|         copy.set_src_host(cpu_data) | ||||
|         copy.set_dst_device(self.data.gpudata) | ||||
|          | ||||
|         #Set offsets of destination | ||||
|         x_offset = (nx_halo - cpu_data.shape[2]) // 2 | ||||
|         y_offset = (ny_halo - cpu_data.shape[1]) // 2 | ||||
|         z_offset = (nz_halo - cpu_data.shape[0]) // 2 | ||||
|         copy.dst_x_in_bytes = x_offset*self.data.strides[1] | ||||
|         copy.dst_y = y_offset | ||||
|         copy.dst_z = z_offset | ||||
|          | ||||
|         #Set pitch of destination | ||||
|         copy.dst_pitch = self.data.strides[0] | ||||
|          | ||||
|         #Set width in bytes to copy for each row and | ||||
|         #number of rows to copy | ||||
|         width = max(self.nx, cpu_data.shape[2]) | ||||
|         height = max(self.ny, cpu_data.shape[1]) | ||||
|         depth = max(self.nz, cpu-data.shape[0]) | ||||
|         copy.width_in_bytes = width*cpu_data.itemsize | ||||
|         copy.height = height | ||||
|         copy.depth = depth | ||||
|          | ||||
|         #Perform the copy | ||||
|         copy(stream) | ||||
|          | ||||
|         #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny) | ||||
|          | ||||
|     def __del__(self, *args): | ||||
|         #self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny) | ||||
|         self.data.gpudata.free() | ||||
|         self.data = None | ||||
|          | ||||
|     def download(self, stream, asynch=False): | ||||
|         """ | ||||
|         Enables downloading data from GPU to Python | ||||
|         """ | ||||
| 
 | ||||
|         #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) | ||||
|         #Allocate host memory | ||||
|         #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32) | ||||
|         #cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32) | ||||
|         cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32) | ||||
|          | ||||
|         #Create copy object from device to host | ||||
|         copy = cuda.Memcpy2D() | ||||
|         copy.set_src_device(self.data.gpudata) | ||||
|         copy.set_dst_host(cpu_data) | ||||
|          | ||||
|         #Set offsets and pitch of source | ||||
|         copy.src_x_in_bytes = self.x_halo*self.data.strides[1] | ||||
|         copy.src_y = self.y_halo | ||||
|         copy.src_z = self.z_halo | ||||
|         copy.src_pitch = self.data.strides[0] | ||||
|          | ||||
|         #Set width in bytes to copy for each row and | ||||
|         #number of rows to copy | ||||
|         copy.width_in_bytes = self.nx*cpu_data.itemsize | ||||
|         copy.height = self.ny | ||||
|         copy.depth = self.nz | ||||
|          | ||||
|         copy(stream) | ||||
|         if asynch==False: | ||||
|             stream.synchronize() | ||||
|          | ||||
|         return cpu_data | ||||
| 
 | ||||
| 
 | ||||
| class ArakawaA2D: | ||||
|     """ | ||||
|     A class representing an Arakawa A type (unstaggered, logically Cartesian) grid | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables): | ||||
|         """ | ||||
|         Uploads initial data to the GPU device | ||||
|         """ | ||||
|         self.logger =  logging.getLogger(__name__) | ||||
|         self.gpu_variables = [] | ||||
|         for cpu_variable in cpu_variables: | ||||
|             self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)] | ||||
|          | ||||
|     def __getitem__(self, key): | ||||
|         assert type(key) == int, "Indexing is int based" | ||||
|         if (key > len(self.gpu_variables) or key < 0): | ||||
|             raise IndexError("Out of bounds") | ||||
|         return self.gpu_variables[key] | ||||
|      | ||||
|     def download(self, stream, variables=None): | ||||
|         """ | ||||
|         Enables downloading data from the GPU device to Python | ||||
|         """ | ||||
|         if variables is None: | ||||
|             variables=range(len(self.gpu_variables)) | ||||
|          | ||||
|         cpu_variables = [] | ||||
|         for i in variables: | ||||
|             assert i < len(self.gpu_variables), "Variable {:d} is out of range".format(i) | ||||
|             cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)] | ||||
| 
 | ||||
|         #stream.synchronize() | ||||
|         return cpu_variables | ||||
|          | ||||
|     def check(self): | ||||
|         """ | ||||
|         Checks that data is still sane | ||||
|         """ | ||||
|         for i, gpu_variable in enumerate(self.gpu_variables): | ||||
|             var_sum = pycuda.gpuarray.sum(gpu_variable.data).get() | ||||
|             self.logger.debug("Data %d with size [%d x %d] has average %f", i, gpu_variable.nx, gpu_variable.ny, var_sum / (gpu_variable.nx * gpu_variable.ny)) | ||||
|             assert np.isnan(var_sum) == False, "Data contains NaN values!" | ||||
|      | ||||
| @ -19,29 +19,29 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| 
 | ||||
| class EE2D_KP07_dimsplit (BaseSimulator): | ||||
| 
 | ||||
| class EE2D_KP07_dimsplit(BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Forward-Backward linear scheme | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|                  context,  | ||||
|                  rho, rho_u, rho_v, E,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,   | ||||
|                  g,  | ||||
|                  gamma,  | ||||
|                  theta=1.3,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  rho, rho_u, rho_v, E, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  gamma, | ||||
|                  theta=1.3, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=8): | ||||
|         """ | ||||
|         Initialization routine | ||||
| @ -60,77 +60,76 @@ class EE2D_KP07_dimsplit (BaseSimulator): | ||||
|             gamma: Gas constant | ||||
|             p: pressure | ||||
|         """ | ||||
|                      | ||||
| 
 | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale,  | ||||
|             2,  | ||||
|             block_width, block_height) | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          2, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
|         self.gamma = np.float32(gamma) | ||||
|         self.theta = np.float32(theta)  | ||||
|         self.theta = np.float32(theta) | ||||
| 
 | ||||
|         #Get kernels | ||||
|         module = context.get_module("cuda/EE2D_KP07_dimsplit.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"],  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/EE2D_KP07_dimsplit.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"], | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|         self.kernel.prepare("iiffffffiiPiPiPiPiPiPiPiPiPiiii") | ||||
|          | ||||
|          | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [rho, rho_u, rho_v, E]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [rho, rho_u, rho_v, E]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         dt_x = np.min(self.dx / (np.abs(rho_u/rho) + np.sqrt(gamma*rho))) | ||||
|         dt_y = np.min(self.dy / (np.abs(rho_v/rho) + np.sqrt(gamma*rho))) | ||||
|         dt_x = np.min(self.dx / (np.abs(rho_u / rho) + np.sqrt(gamma * rho))) | ||||
|         dt_y = np.min(self.dy / (np.abs(rho_v / rho) + np.sqrt(gamma * rho))) | ||||
|         self.dt = min(dt_x, dt_y) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|      | ||||
|     def substep(self, dt, step_number, external=True, internal=True): | ||||
|             self.substepDimsplit(0.5*dt, step_number, external, internal) | ||||
|      | ||||
|     def substepDimsplit(self, dt, substep, external, internal): | ||||
|         if external and internal: | ||||
|             #print("COMPLETE DOMAIN (dt=" + str(dt) + ")") | ||||
| 
 | ||||
|             self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.gamma,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u0[3].data.gpudata, self.u0[3].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0],  | ||||
|                 self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0,  | ||||
|                 self.nx, self.ny) | ||||
|     def substep(self, dt, step_number, external=True, internal=True): | ||||
|         self.substep_dimsplit(0.5 * dt, step_number, external, internal) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep, external, internal): | ||||
|         if external and internal: | ||||
|             # print("COMPLETE DOMAIN (dt=" + str(dt) + ")") | ||||
| 
 | ||||
|             self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             0, 0, | ||||
|                                             self.nx, self.ny) | ||||
|             return | ||||
|          | ||||
| 
 | ||||
|         if external and not internal: | ||||
|             ################################### | ||||
|             # XXX: Corners are treated twice! # | ||||
| @ -141,136 +140,135 @@ class EE2D_KP07_dimsplit (BaseSimulator): | ||||
|             # NORTH | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #  (0, ny-y_halo) x (nx, ny) | ||||
|             self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny, | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.gamma,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u0[3].data.gpudata, self.u0[3].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0],  | ||||
|                 self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, self.ny - int(self.u0[0].y_halo), | ||||
|                 self.nx, self.ny) | ||||
|             self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             0, self.ny - int(self.u0[0].y_halo), | ||||
|                                             self.nx, self.ny) | ||||
| 
 | ||||
|             # SOUTH | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #   (0, 0) x (nx, y_halo) | ||||
|             self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny, | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.gamma,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u0[3].data.gpudata, self.u0[3].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0],  | ||||
|                 self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, int(self.u0[0].y_halo)) | ||||
|              | ||||
|             self.kernel.prepared_async_call(ns_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             0, 0, | ||||
|                                             self.nx, int(self.u0[0].y_halo)) | ||||
| 
 | ||||
|             we_grid_size = (1, self.grid_size[1]) | ||||
|              | ||||
| 
 | ||||
|             # WEST | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #  (0, 0) x (x_halo, ny) | ||||
|             self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny, | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.gamma,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u0[3].data.gpudata, self.u0[3].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0],  | ||||
|                 self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 int(self.u0[0].x_halo), self.ny) | ||||
|             self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             0, 0, | ||||
|                                             int(self.u0[0].x_halo), self.ny) | ||||
| 
 | ||||
|             # EAST | ||||
|             # (x0, y0) x (x1, y1) | ||||
|             #   (nx-x_halo, 0) x (nx, ny) | ||||
|             self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny, | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.gamma,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u0[3].data.gpudata, self.u0[3].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0],  | ||||
|                 self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 self.nx - int(self.u0[0].x_halo), 0, | ||||
|                 self.nx, self.ny) | ||||
|             self.kernel.prepared_async_call(we_grid_size, self.block_size, self.stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             self.nx - int(self.u0[0].x_halo), 0, | ||||
|                                             self.nx, self.ny) | ||||
|             return | ||||
| 
 | ||||
|         if internal and not external: | ||||
|              | ||||
|             # INTERNAL DOMAIN | ||||
|             #         (x0, y0) x (x1, y1) | ||||
|             # (x_halo, y_halo) x (nx - x_halo, ny - y_halo) | ||||
|             self.kernel.prepared_async_call(self.grid_size, self.block_size, self.internal_stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.gamma,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u0[3].data.gpudata, self.u0[3].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0],  | ||||
|                 self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 int(self.u0[0].x_halo), int(self.u0[0].y_halo), | ||||
|                 self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo)) | ||||
|             self.kernel.prepared_async_call(self.grid_size, self.block_size, self.internal_stream, | ||||
|                                             self.nx, self.ny, | ||||
|                                             self.dx, self.dy, dt, | ||||
|                                             self.g, | ||||
|                                             self.gamma, | ||||
|                                             self.theta, | ||||
|                                             substep, | ||||
|                                             self.boundary_conditions, | ||||
|                                             self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                             self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                             self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                             self.u0[3].data.gpudata, self.u0[3].data.strides[0], | ||||
|                                             self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                             self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                             self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                             self.u1[3].data.gpudata, self.u1[3].data.strides[0], | ||||
|                                             self.cfl_data.gpudata, | ||||
|                                             int(self.u0[0].x_halo), int(self.u0[0].y_halo), | ||||
|                                             self.nx - int(self.u0[0].x_halo), self.ny - int(self.u0[0].y_halo)) | ||||
|             return | ||||
| 
 | ||||
|     def swapBuffers(self): | ||||
|     def swap_buffers(self): | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|         return | ||||
|          | ||||
|     def getOutput(self): | ||||
| 
 | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|         return | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5 | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -20,30 +20,31 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| class FORCE (Simulator.BaseSimulator): | ||||
| 
 | ||||
| class FORCE(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations  | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|                  context,  | ||||
|                  h0, hu0, hv0,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,  | ||||
|                  g,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  h0, hu0, hv0, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
|          | ||||
| @ -59,76 +60,76 @@ class FORCE (Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             1, | ||||
|             block_width, block_height) | ||||
|         self.g = np.float32(g)  | ||||
| 
 | ||||
|         #Get kernels | ||||
|         # Call super constructor | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          1, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_FORCE.cu", | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("FORCEKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt == None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
| 
 | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|          | ||||
|     def getOutput(self): | ||||
| 
 | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
|          | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|                  | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt | ||||
|  | ||||
| @ -19,30 +19,31 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| class HLL (Simulator.BaseSimulator): | ||||
| 
 | ||||
| class HLL(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  h0, hu0, hv0,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,  | ||||
|                  g,  | ||||
|                  h0, hu0, hv0, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
| 
 | ||||
| @ -58,74 +59,74 @@ class HLL (Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             1, | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
| 
 | ||||
|         #Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_HLL.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         # Call super constructor | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          1, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_HLL.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLLKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|         if dt == None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
|          | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|          | ||||
|     def getOutput(self): | ||||
| 
 | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
|                          | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5 | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -19,31 +19,32 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| class HLL2 (Simulator.BaseSimulator): | ||||
| 
 | ||||
| class HLL2(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Forward-Backward linear scheme | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|                  context,  | ||||
|                  h0, hu0, hv0,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,  | ||||
|                  g,  | ||||
|                  theta=1.8,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  h0, hu0, hv0, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  theta=1.8, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
| 
 | ||||
| @ -59,81 +60,81 @@ class HLL2 (Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
| 
 | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             2, | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          2, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
|         self.theta = np.float32(theta) | ||||
|          | ||||
|         #Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_HLL2.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_HLL2.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("HLL2Kernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii") | ||||
|          | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|          | ||||
|         if dt == None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
|          | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substepDimsplit(dt*0.5, step_number) | ||||
|                  | ||||
|     def substepDimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.theta,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
|         self.substep_dimsplit(dt * 0.5, step_number) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.theta, | ||||
|                                         substep, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|      | ||||
|     def getOutput(self): | ||||
| 
 | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
|          | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5 | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -26,12 +26,12 @@ from IPython.core import magic_arguments | ||||
| from IPython.core.magic import line_magic, Magics, magics_class | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from GPUSimulators import Common | ||||
| from GPUSimulators.common import IPEngine | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| 
 | ||||
| 
 | ||||
| @magics_class | ||||
| class MagicCudaContext(Magics):  | ||||
| class MagicCudaContext(Magics): | ||||
|     @line_magic | ||||
|     @magic_arguments.magic_arguments() | ||||
|     @magic_arguments.argument( | ||||
| @ -44,14 +44,14 @@ class MagicCudaContext(Magics): | ||||
|         '--no_autotuning', '-na', action="store_true", help='Disable autotuning of kernels') | ||||
|     def cuda_context_handler(self, line): | ||||
|         args = magic_arguments.parse_argstring(self.cuda_context_handler, line) | ||||
|         self.logger =  logging.getLogger(__name__) | ||||
|          | ||||
|         self.logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|         self.logger.info("Registering %s in user workspace", args.name) | ||||
|          | ||||
| 
 | ||||
|         context_flags = None | ||||
|         if (args.blocking): | ||||
|         if args.blocking: | ||||
|             context_flags = cuda.ctx_flags.SCHED_BLOCKING_SYNC | ||||
|          | ||||
| 
 | ||||
|         if args.name in self.shell.user_ns.keys(): | ||||
|             self.logger.debug("Context already registered! Ignoring") | ||||
|             return | ||||
| @ -59,12 +59,13 @@ class MagicCudaContext(Magics): | ||||
|             self.logger.debug("Creating context") | ||||
|             use_cache = False if args.no_cache else True | ||||
|             use_autotuning = False if args.no_autotuning else True | ||||
|             self.shell.user_ns[args.name] = CudaContext.CudaContext(context_flags=context_flags, use_cache=use_cache, autotuning=use_autotuning) | ||||
|          | ||||
|             self.shell.user_ns[args.name] = CudaContext(context_flags=context_flags, use_cache=use_cache, | ||||
|                                                                     autotuning=use_autotuning) | ||||
| 
 | ||||
|         # this function will be called on exceptions in any cell | ||||
|         def custom_exc(shell, etype, evalue, tb, tb_offset=None): | ||||
|             self.logger.exception("Exception caught: Resetting to CUDA context %s", args.name) | ||||
|             while (cuda.Context.get_current() != None): | ||||
|             while cuda.Context.get_current() is not None: | ||||
|                 context = cuda.Context.get_current() | ||||
|                 self.logger.info("Popping <%s>", str(context.handle)) | ||||
|                 cuda.Context.pop() | ||||
| @ -77,36 +78,30 @@ class MagicCudaContext(Magics): | ||||
|                 self.logger.error("CUDA will not work now") | ||||
| 
 | ||||
|             self.logger.debug("==================================================================") | ||||
|              | ||||
| 
 | ||||
|             # still show the error within the notebook, don't just swallow it | ||||
|             shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset) | ||||
| 
 | ||||
|         # this registers a custom exception handler for the whole current notebook | ||||
|         get_ipython().set_custom_exc((Exception,), custom_exc) | ||||
|          | ||||
|          | ||||
| 
 | ||||
|         # Handle CUDA context when exiting python | ||||
|         import atexit | ||||
|         def exitfunc(): | ||||
|             self.logger.info("Exitfunc: Resetting CUDA context stack") | ||||
|             while (cuda.Context.get_current() != None): | ||||
|             while cuda.Context.get_current() != None: | ||||
|                 context = cuda.Context.get_current() | ||||
|                 self.logger.info("`-> Popping <%s>", str(context.handle)) | ||||
|                 cuda.Context.pop() | ||||
|             self.logger.debug("==================================================================") | ||||
| 
 | ||||
|         atexit.register(exitfunc) | ||||
|          | ||||
|          | ||||
|          | ||||
|          | ||||
|          | ||||
|          | ||||
|          | ||||
|          | ||||
| 
 | ||||
| 
 | ||||
| @magics_class | ||||
| class MagicLogger(Magics):  | ||||
| class MagicLogger(Magics): | ||||
|     logger_initialized = False | ||||
|      | ||||
| 
 | ||||
|     @line_magic | ||||
|     @magic_arguments.magic_arguments() | ||||
|     @magic_arguments.argument( | ||||
| @ -118,51 +113,47 @@ class MagicLogger(Magics): | ||||
|     @magic_arguments.argument( | ||||
|         '--file_level', '-f', type=int, default=10, help='The level of logging to file [0, 50]') | ||||
|     def setup_logging(self, line): | ||||
|         if (self.logger_initialized): | ||||
|         if self.logger_initialized: | ||||
|             logging.getLogger('GPUSimulators').info("Global logger already initialized!") | ||||
|             return; | ||||
|             return | ||||
|         else: | ||||
|             self.logger_initialized = True | ||||
|              | ||||
| 
 | ||||
|             args = magic_arguments.parse_argstring(self.setup_logging, line) | ||||
|             import sys | ||||
|              | ||||
|             #Get root logger | ||||
| 
 | ||||
|             # Get root logger | ||||
|             logger = logging.getLogger('GPUSimulators') | ||||
|             logger.setLevel(min(args.level, args.file_level)) | ||||
| 
 | ||||
|             #Add log to screen | ||||
|             # Add log to screen | ||||
|             ch = logging.StreamHandler() | ||||
|             ch.setLevel(args.level) | ||||
|             logger.addHandler(ch) | ||||
|             logger.log(args.level, "Console logger using level %s", logging.getLevelName(args.level)) | ||||
|              | ||||
|             #Get the outfilename (try to evaluate if Python expression...) | ||||
| 
 | ||||
|             # Get the outfilename (try to evaluate if Python expression...) | ||||
|             try: | ||||
|                 outfile = eval(args.out, self.shell.user_global_ns, self.shell.user_ns) | ||||
|             except: | ||||
|                 outfile = args.out | ||||
|              | ||||
|             #Add log to file | ||||
| 
 | ||||
|             # Add log to file | ||||
|             logger.log(args.level, "File logger using level %s to %s", logging.getLevelName(args.file_level), outfile) | ||||
|              | ||||
| 
 | ||||
|             fh = logging.FileHandler(outfile) | ||||
|             formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s: %(message)s') | ||||
|             fh.setFormatter(formatter) | ||||
|             fh.setLevel(args.file_level) | ||||
|             logger.addHandler(fh) | ||||
|          | ||||
| 
 | ||||
|         logger.info("Python version %s", sys.version) | ||||
|         self.shell.user_ns[args.name] = logger | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|          | ||||
| 
 | ||||
| 
 | ||||
| @magics_class | ||||
| class MagicMPI(Magics):  | ||||
|      | ||||
| class MagicMPI(Magics): | ||||
| 
 | ||||
|     @line_magic | ||||
|     @magic_arguments.magic_arguments() | ||||
|     @magic_arguments.argument( | ||||
| @ -177,13 +168,7 @@ class MagicMPI(Magics): | ||||
|             self.shell.user_ns[args.name].shutdown() | ||||
|             self.shell.user_ns[args.name] = None | ||||
|             gc.collect() | ||||
|         self.shell.user_ns[args.name] = Common.IPEngine(args.num_engines) | ||||
| 
 | ||||
|          | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|         self.shell.user_ns[args.name] = IPEngine(args.num_engines) | ||||
| 
 | ||||
| 
 | ||||
| # Register  | ||||
| @ -191,4 +176,3 @@ ip = get_ipython() | ||||
| ip.register_magics(MagicCudaContext) | ||||
| ip.register_magics(MagicLogger) | ||||
| ip.register_magics(MagicMPI) | ||||
| 
 | ||||
|  | ||||
| @ -24,32 +24,33 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| class KP07 (Simulator.BaseSimulator): | ||||
| 
 | ||||
| class KP07(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Forward-Backward linear scheme | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|                  context,  | ||||
|                  h0, hu0, hv0,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,  | ||||
|                  g,  | ||||
|                  theta=1.3,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  h0, hu0, hv0, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  theta=1.3, | ||||
|                  cfl_scale=0.9, | ||||
|                  order=2, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
|          | ||||
| @ -65,84 +66,82 @@ class KP07 (Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
| 
 | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             order, | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)              | ||||
|         self.theta = np.float32(theta)  | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          order, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
|         self.theta = np.float32(theta) | ||||
|         self.order = np.int32(order) | ||||
| 
 | ||||
|         #Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_KP07.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_KP07.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07Kernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii") | ||||
|          | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|          | ||||
|         if dt == None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
|          | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|                          | ||||
|          | ||||
|     def substep(self, dt, step_number): | ||||
|             self.substepRK(dt, step_number) | ||||
| 
 | ||||
|          | ||||
|     def substepRK(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.theta,  | ||||
|                 Simulator.stepOrderToCodedInt(step=substep, order=self.order),  | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substep_rk(dt, step_number) | ||||
| 
 | ||||
|     def substep_rk(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.theta, | ||||
|                                         Simulator.step_order_to_coded_int(step=substep, order=self.order), | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def getOutput(self): | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
|          | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5**(self.order-1) | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 ** (self.order - 1) | ||||
|  | ||||
| @ -24,31 +24,32 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| 
 | ||||
| class KP07_dimsplit(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the dimentionally split KP07 scheme | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|                  context,  | ||||
|                  h0, hu0, hv0,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,  | ||||
|                  g,  | ||||
|                  theta=1.3,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  h0, hu0, hv0, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  theta=1.3, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
|          | ||||
| @ -64,83 +65,83 @@ class KP07_dimsplit(Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
| 
 | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             2,  | ||||
|             block_width, block_height) | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          2, | ||||
|                          block_width, block_height) | ||||
|         self.gc_x = 2 | ||||
|         self.gc_y = 2 | ||||
|         self.g = np.float32(g) | ||||
|         self.theta = np.float32(theta) | ||||
| 
 | ||||
|         #Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_KP07_dimsplit.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_KP07_dimsplit.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("KP07DimsplitKernel") | ||||
|         self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         self.gc_x, self.gc_y,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         self.gc_x, self.gc_y,  | ||||
|                         [None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              self.gc_x, self.gc_y, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              self.gc_x, self.gc_y, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt == None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
|          | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|      | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substepDimsplit(dt*0.5, step_number) | ||||
|      | ||||
|     def substepDimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.theta,  | ||||
|                 substep,  | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
|         self.substep_dimsplit(dt * 0.5, step_number) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.theta, | ||||
|                                         substep, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def getOutput(self): | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
| 
 | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -20,16 +20,17 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| class LxF (Simulator.BaseSimulator): | ||||
| 
 | ||||
| class LxF(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Lax Friedrichs scheme | ||||
|     """ | ||||
| @ -40,11 +41,11 @@ class LxF (Simulator.BaseSimulator): | ||||
|                  nx: int, ny: int, | ||||
|                  dx: int, dy: int, | ||||
|                  g: float, | ||||
|                  cfl_scale: float=0.9, | ||||
|                  cfl_scale: float = 0.9, | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width: int=16, block_height: int=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  block_width: int = 16, block_height: int = 16, | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
| 
 | ||||
| @ -60,80 +61,80 @@ class LxF (Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
| 
 | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             1, | ||||
|             block_width, block_height) | ||||
|         self.g = np.float32(g)  | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          1, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_LxF.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         module = context.get_module("cuda/SWE2D_LxF.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("LxFKernel") | ||||
|         self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii") | ||||
| 
 | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         1, 1,  | ||||
|                         [None, None, None]) | ||||
|         # Create data by uploading to thedevice | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              1, 1, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
| 
 | ||||
|         if dt == None:   | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))  | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
|          | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|          | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         """ | ||||
|         Args: | ||||
|             dt: Size of each timestep (seconds) | ||||
|         """ | ||||
|          | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
| 
 | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
|    | ||||
|     def getOutput(self): | ||||
| 
 | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5 | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
| @ -222,7 +222,7 @@ class MPISimulator(Simulator.BaseSimulator): | ||||
|          | ||||
|         autotuner = sim.context.autotuner | ||||
|         sim.context.autotuner = None; | ||||
|         boundary_conditions = sim.getBoundaryConditions() | ||||
|         boundary_conditions = sim.get_boundary_conditions() | ||||
|         super().__init__(sim.context,  | ||||
|             sim.nx, sim.ny,  | ||||
|             sim.dx, sim.dy,  | ||||
| @ -263,14 +263,14 @@ class MPISimulator(Simulator.BaseSimulator): | ||||
|         if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic): | ||||
|             self.north = None | ||||
|             new_boundary_conditions.north = boundary_conditions.north; | ||||
|         sim.setBoundaryConditions(new_boundary_conditions) | ||||
|         sim.set_boundary_conditions(new_boundary_conditions) | ||||
|                  | ||||
|         #Get number of variables | ||||
|         self.nvars = len(self.getOutput().gpu_variables) | ||||
|         self.nvars = len(self.get_output().gpu_variables) | ||||
|          | ||||
|         #Shorthands for computing extents and sizes | ||||
|         gc_x = int(self.sim.getOutput()[0].x_halo) | ||||
|         gc_y = int(self.sim.getOutput()[0].y_halo) | ||||
|         gc_x = int(self.sim.get_output()[0].x_halo) | ||||
|         gc_y = int(self.sim.get_output()[0].y_halo) | ||||
|         nx = int(self.sim.nx) | ||||
|         ny = int(self.sim.ny) | ||||
|          | ||||
| @ -322,7 +322,7 @@ class MPISimulator(Simulator.BaseSimulator): | ||||
|         #nvtx.mark("substep full", color="blue") | ||||
|         #self.sim.substep(dt, step_number, external=True, internal=True) | ||||
| 
 | ||||
|         self.sim.swapBuffers() | ||||
|         self.sim.swap_buffers() | ||||
| 
 | ||||
|         self.profiling_data_mpi["end"]["t_mpi_step"] += time.time() | ||||
|          | ||||
| @ -336,8 +336,8 @@ class MPISimulator(Simulator.BaseSimulator): | ||||
|          | ||||
|         self.profiling_data_mpi["n_time_steps"] += 1 | ||||
| 
 | ||||
|     def getOutput(self): | ||||
|         return self.sim.getOutput() | ||||
|     def get_output(self): | ||||
|         return self.sim.get_output() | ||||
|          | ||||
|     def synchronize(self): | ||||
|         self.sim.synchronize() | ||||
| @ -345,14 +345,14 @@ class MPISimulator(Simulator.BaseSimulator): | ||||
|     def check(self): | ||||
|         return self.sim.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         local_dt = np.array([np.float32(self.sim.computeDt())]); | ||||
|     def compute_dt(self): | ||||
|         local_dt = np.array([np.float32(self.sim.compute_dt())]); | ||||
|         global_dt = np.empty(1, dtype=np.float32) | ||||
|         self.grid.comm.Allreduce(local_dt, global_dt, op=MPI.MIN) | ||||
|         self.logger.debug("Local dt: {:f}, global dt: {:f}".format(local_dt[0], global_dt[0])) | ||||
|         return global_dt[0] | ||||
|          | ||||
|     def getExtent(self): | ||||
|     def get_extent(self): | ||||
|         """ | ||||
|         Function which returns the extent of node with rank  | ||||
|         rank in the grid | ||||
|  | ||||
| @ -45,7 +45,7 @@ class SHMEMSimulator(Simulator.BaseSimulator): | ||||
|         # This would also eliminate the need for all the array bookkeeping in this class. | ||||
|         autotuner = sims[0].context.autotuner | ||||
|         sims[0].context.autotuner = None | ||||
|         boundary_conditions = sims[0].getBoundaryConditions() | ||||
|         boundary_conditions = sims[0].get_boundary_conditions() | ||||
|         super().__init__(sims[0].context,  | ||||
|             sims[0].nx, sims[0].ny,  | ||||
|             sims[0].dx, sims[0].dy,  | ||||
| @ -108,14 +108,14 @@ class SHMEMSimulator(Simulator.BaseSimulator): | ||||
|             if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic): | ||||
|                 self.north = None | ||||
|                 new_boundary_conditions.north = boundary_conditions.north; | ||||
|             sim.setBoundaryConditions(new_boundary_conditions) | ||||
|             sim.set_boundary_conditions(new_boundary_conditions) | ||||
|                      | ||||
|             #Get number of variables | ||||
|             self.nvars[i] = len(sim.getOutput().gpu_variables) | ||||
|             self.nvars[i] = len(sim.get_output().gpu_variables) | ||||
|              | ||||
|             #Shorthands for computing extents and sizes | ||||
|             gc_x = int(sim.getOutput()[0].x_halo) | ||||
|             gc_y = int(sim.getOutput()[0].y_halo) | ||||
|             gc_x = int(sim.get_output()[0].x_halo) | ||||
|             gc_y = int(sim.get_output()[0].y_halo) | ||||
|             nx = int(sim.nx) | ||||
|             ny = int(sim.ny) | ||||
|              | ||||
| @ -150,10 +150,10 @@ class SHMEMSimulator(Simulator.BaseSimulator): | ||||
|         for i, sim in enumerate(self.sims): | ||||
|             sim.substep(dt, step_number) | ||||
|      | ||||
|     def getOutput(self): | ||||
|     def get_output(self): | ||||
|         # XXX: Does not return what we would expect. | ||||
|         # Returns first subdomain, but we want the whole domain. | ||||
|         return self.sims[0].getOutput()  | ||||
|         return self.sims[0].get_output() | ||||
|          | ||||
|     def synchronize(self): | ||||
|         for sim in self.sims: | ||||
| @ -164,14 +164,14 @@ class SHMEMSimulator(Simulator.BaseSimulator): | ||||
|         # Checks only first subdomain, but we want to check the whole domain. | ||||
|         return self.sims[0].check() | ||||
|      | ||||
|     def computeDt(self): | ||||
|     def compute_dt(self): | ||||
|         global_dt = float("inf") | ||||
| 
 | ||||
|         for sim in self.sims: | ||||
|             sim.context.synchronize() | ||||
| 
 | ||||
|         for sim in self.sims: | ||||
|             local_dt = sim.computeDt() | ||||
|             local_dt = sim.compute_dt() | ||||
|             if local_dt < global_dt: | ||||
|                 global_dt = local_dt | ||||
|             self.logger.debug("Local dt: {:f}".format(local_dt)) | ||||
| @ -179,7 +179,7 @@ class SHMEMSimulator(Simulator.BaseSimulator): | ||||
|         self.logger.debug("Global dt: {:f}".format(global_dt)) | ||||
|         return global_dt | ||||
|          | ||||
|     def getExtent(self, index=0): | ||||
|     def get_extent(self, index=0): | ||||
|         """ | ||||
|         Function which returns the extent of the subdomain with index  | ||||
|         index in the grid | ||||
|  | ||||
| @ -62,8 +62,8 @@ class SHMEMGrid(object): | ||||
| 
 | ||||
|         for i in range(self.ngpus): | ||||
|             # XXX: disabled for testing on single-GPU system | ||||
|             #self.cuda_contexts.append(CudaContext.CudaContext(device=i, autotuning=False)) | ||||
|             self.cuda_contexts.append(CudaContext.CudaContext(device=0, autotuning=False)) | ||||
|             #self.cuda_contexts.append(CudaContext(device=i, autotuning=False)) | ||||
|             self.cuda_contexts.append(CudaContext(device=0, autotuning=False)) | ||||
| 
 | ||||
|     def getCoordinate(self, index): | ||||
|         i = (index  % self.grid[0]) | ||||
| @ -180,7 +180,7 @@ class SHMEMSimulatorGroup(object): | ||||
|          | ||||
|         autotuner = sims[0].context.autotuner | ||||
|         sims[0].context.autotuner = None | ||||
|         boundary_conditions = sims[0].getBoundaryConditions() | ||||
|         boundary_conditions = sims[0].get_boundary_conditions() | ||||
|         super().__init__(sims[0].context,  | ||||
|             sims[0].nx, sims[0].ny,  | ||||
|             sims[0].dx, sims[0].dy,  | ||||
| @ -243,14 +243,14 @@ class SHMEMSimulatorGroup(object): | ||||
|             if (gj == grid.grid[1]-1 and boundary_conditions.north != Simulator.BoundaryCondition.Type.Periodic): | ||||
|                 self.north = None | ||||
|                 new_boundary_conditions.north = boundary_conditions.north; | ||||
|             sim.setBoundaryConditions(new_boundary_conditions) | ||||
|             sim.set_boundary_conditions(new_boundary_conditions) | ||||
|                      | ||||
|             #Get number of variables | ||||
|             self.nvars[i] = len(sim.getOutput().gpu_variables) | ||||
|             self.nvars[i] = len(sim.get_output().gpu_variables) | ||||
|              | ||||
|             #Shorthands for computing extents and sizes | ||||
|             gc_x = int(sim.getOutput()[0].x_halo) | ||||
|             gc_y = int(sim.getOutput()[0].y_halo) | ||||
|             gc_x = int(sim.get_output()[0].x_halo) | ||||
|             gc_y = int(sim.get_output()[0].y_halo) | ||||
|             nx = int(sim.nx) | ||||
|             ny = int(sim.ny) | ||||
|              | ||||
| @ -287,7 +287,7 @@ class SHMEMSimulatorGroup(object): | ||||
|     def getOutput(self): | ||||
|         # XXX: Does not return what we would expect. | ||||
|         # Returns first subdomain, but we want the whole domain. | ||||
|         return self.sims[0].getOutput()  | ||||
|         return self.sims[0].get_output() | ||||
|          | ||||
|     def synchronize(self): | ||||
|         for sim in self.sims: | ||||
| @ -305,7 +305,7 @@ class SHMEMSimulatorGroup(object): | ||||
|             sim.context.synchronize() | ||||
| 
 | ||||
|         for sim in self.sims: | ||||
|             local_dt = sim.computeDt() | ||||
|             local_dt = sim.compute_dt() | ||||
|             if local_dt < global_dt: | ||||
|                 global_dt = local_dt | ||||
|             self.logger.debug("Local dt: {:f}".format(local_dt)) | ||||
|  | ||||
| @ -20,18 +20,38 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| import logging | ||||
| from enum import IntEnum | ||||
| 
 | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from GPUSimulators import Common | ||||
| from GPUSimulators.common import ProgressPrinter | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| 
 | ||||
| 
 | ||||
| class BoundaryCondition(object):     | ||||
| def get_types(bc): | ||||
|     types = {'north': BoundaryCondition.Type((bc >> 24) & 0x0000000F), | ||||
|              'south': BoundaryCondition.Type((bc >> 16) & 0x0000000F), | ||||
|              'east': BoundaryCondition.Type((bc >> 8) & 0x0000000F), | ||||
|              'west': BoundaryCondition.Type((bc >> 0) & 0x0000000F)} | ||||
|     return types | ||||
| 
 | ||||
| 
 | ||||
| def step_order_to_coded_int(step, order): | ||||
|     """ | ||||
|     Helper function which packs the step and order into a single integer | ||||
|     """ | ||||
| 
 | ||||
|     step_order = (step << 16) | (order & 0x0000ffff) | ||||
|     # print("Step:  {0:032b}".format(step)) | ||||
|     # print("Order: {0:032b}".format(order)) | ||||
|     # print("Mix:   {0:032b}".format(step_order)) | ||||
|     return np.int32(step_order) | ||||
| 
 | ||||
| 
 | ||||
| class BoundaryCondition(object): | ||||
|     """ | ||||
|     Class for holding boundary conditions for global boundaries | ||||
|     """ | ||||
| @ -47,12 +67,7 @@ class BoundaryCondition(object): | ||||
|         Periodic = 2, | ||||
|         Reflective = 3 | ||||
| 
 | ||||
|     def __init__(self, types={  | ||||
|                     'north': Type.Reflective,  | ||||
|                     'south': Type.Reflective,  | ||||
|                     'east': Type.Reflective,  | ||||
|                     'west': Type.Reflective  | ||||
|                  }): | ||||
|     def __init__(self, types: dict[str: Type.Reflective]): | ||||
|         """ | ||||
|         Constructor | ||||
|         """ | ||||
| @ -61,17 +76,18 @@ class BoundaryCondition(object): | ||||
|         self.south = types['south'] | ||||
|         self.east = types['east'] | ||||
|         self.west = types['west'] | ||||
|          | ||||
|         if (self.north == BoundaryCondition.Type.Neumann \ | ||||
|                 or self.south == BoundaryCondition.Type.Neumann \ | ||||
|                 or self.east == BoundaryCondition.Type.Neumann \ | ||||
|                 or self.west == BoundaryCondition.Type.Neumann): | ||||
|             raise(NotImplementedError("Neumann boundary condition not supported")) | ||||
|              | ||||
|     def __str__(self): | ||||
|         return  '[north={:s}, south={:s}, east={:s}, west={:s}]'.format(str(self.north), str(self.south), str(self.east), str(self.west)) | ||||
| 
 | ||||
|     def asCodedInt(self): | ||||
|         if (self.north == BoundaryCondition.Type.Neumann | ||||
|                 or self.south == BoundaryCondition.Type.Neumann | ||||
|                 or self.east == BoundaryCondition.Type.Neumann | ||||
|                 or self.west == BoundaryCondition.Type.Neumann): | ||||
|             raise (NotImplementedError("Neumann boundary condition not supported")) | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         return '[north={:s}, south={:s}, east={:s}, west={:s}]'.format(str(self.north), str(self.south), str(self.east), | ||||
|                                                                        str(self.west)) | ||||
| 
 | ||||
|     def as_coded_int(self): | ||||
|         """ | ||||
|         Helper function which packs four boundary conditions into one integer | ||||
|         """ | ||||
| @ -79,26 +95,18 @@ class BoundaryCondition(object): | ||||
|         bc = 0 | ||||
|         bc = bc | (self.north & 0x0000000F) << 24 | ||||
|         bc = bc | (self.south & 0x0000000F) << 16 | ||||
|         bc = bc | (self.east  & 0x0000000F) <<  8 | ||||
|         bc = bc | (self.west  & 0x0000000F) <<  0 | ||||
|          | ||||
|         #for t in types: | ||||
|         bc = bc | (self.east & 0x0000000F) << 8 | ||||
|         bc = bc | (self.west & 0x0000000F) << 0 | ||||
| 
 | ||||
|         # for t in types: | ||||
|         #    print("{0:s}, {1:d}, {1:032b}, {1:08b}".format(t, types[t])) | ||||
|         #print("bc: {0:032b}".format(bc)) | ||||
|          | ||||
|         # print("bc: {0:032b}".format(bc)) | ||||
| 
 | ||||
|         return np.int32(bc) | ||||
|          | ||||
|     def getTypes(bc): | ||||
|         types = {} | ||||
|         types['north'] = BoundaryCondition.Type((bc >> 24) & 0x0000000F) | ||||
|         types['south'] = BoundaryCondition.Type((bc >> 16) & 0x0000000F) | ||||
|         types['east']  = BoundaryCondition.Type((bc >>  8) & 0x0000000F) | ||||
|         types['west']  = BoundaryCondition.Type((bc >>  0) & 0x0000000F) | ||||
|         return types | ||||
| 
 | ||||
| 
 | ||||
| class BaseSimulator(object): | ||||
|     | ||||
| 
 | ||||
|     def __init__(self, | ||||
|                  context: CudaContext, | ||||
|                  nx: int, ny: int, | ||||
| @ -125,40 +133,40 @@ class BaseSimulator(object): | ||||
|             num_substeps: Number of substeps to perform for a full step | ||||
|         """ | ||||
| 
 | ||||
|         #Get logger | ||||
|         # Get logger | ||||
|         self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__) | ||||
|          | ||||
|         #Save input parameters | ||||
|         #Notice that we need to specify them in the correct dataformat for the | ||||
|         #GPU kernel | ||||
| 
 | ||||
|         # Save input parameters | ||||
|         # Notice that we need to specify them in the correct dataformat for the | ||||
|         # GPU kernel | ||||
|         self.context = context | ||||
|         self.nx = np.int32(nx) | ||||
|         self.ny = np.int32(ny) | ||||
|         self.dx = np.float32(dx) | ||||
|         self.dy = np.float32(dy) | ||||
|         self.setBoundaryConditions(boundary_conditions) | ||||
|         self.set_boundary_conditions(boundary_conditions) | ||||
|         self.cfl_scale = cfl_scale | ||||
|         self.num_substeps = num_substeps | ||||
|          | ||||
|         #Handle autotuning block size | ||||
| 
 | ||||
|         # Handle autotuning block size | ||||
|         if self.context.autotuner: | ||||
|             peak_configuration = self.context.autotuner.get_peak_performance(self.__class__) | ||||
|             block_width = int(peak_configuration["block_width"]) | ||||
|             block_height = int(peak_configuration["block_height"]) | ||||
|             self.logger.debug("Used autotuning to get block size [%d x %d]", block_width, block_height) | ||||
|          | ||||
|         #Compute kernel launch parameters | ||||
|         self.block_size = (block_width, block_height, 1)  | ||||
|         self.grid_size = (  | ||||
|                        int(np.ceil(self.nx / float(self.block_size[0]))),  | ||||
|                        int(np.ceil(self.ny / float(self.block_size[1])))  | ||||
|                       ) | ||||
|          | ||||
|         #Create a CUDA stream | ||||
| 
 | ||||
|         # Compute kernel launch parameters | ||||
|         self.block_size = (block_width, block_height, 1) | ||||
|         self.grid_size = ( | ||||
|             int(np.ceil(self.nx / float(self.block_size[0]))), | ||||
|             int(np.ceil(self.ny / float(self.block_size[1]))) | ||||
|         ) | ||||
| 
 | ||||
|         # Create a CUDA stream | ||||
|         self.stream = cuda.Stream() | ||||
|         self.internal_stream = cuda.Stream() | ||||
|          | ||||
|         #Keep track of simulation time and number of timesteps | ||||
| 
 | ||||
|         # Keep track of simulation time and number of timesteps | ||||
|         self.t = 0.0 | ||||
|         self.nt = 0 | ||||
| 
 | ||||
| @ -171,41 +179,41 @@ class BaseSimulator(object): | ||||
|         Requires that the step() function is implemented in the subclasses | ||||
|         """ | ||||
| 
 | ||||
|         printer = Common.ProgressPrinter(t) | ||||
|          | ||||
|         t_start = self.simTime() | ||||
|         printer = ProgressPrinter(t) | ||||
| 
 | ||||
|         t_start = self.sim_time() | ||||
|         t_end = t_start + t | ||||
|          | ||||
| 
 | ||||
|         update_dt = True | ||||
|         if (dt is not None): | ||||
|         if dt is not None: | ||||
|             update_dt = False | ||||
|             self.dt = dt | ||||
|          | ||||
|         while(self.simTime() < t_end): | ||||
| 
 | ||||
|         while self.sim_time() < t_end: | ||||
|             # Update dt every 100 timesteps and cross your fingers it works | ||||
|             # for the next 100 | ||||
|             if (update_dt and (self.simSteps() % 100 == 0)): | ||||
|                 self.dt = self.computeDt()*self.cfl_scale | ||||
|          | ||||
|             if update_dt and (self.sim_steps() % 100 == 0): | ||||
|                 self.dt = self.compute_dt() * self.cfl_scale | ||||
| 
 | ||||
|             # Compute timestep for "this" iteration (i.e., shorten last timestep) | ||||
|             current_dt = np.float32(min(self.dt, t_end-self.simTime())) | ||||
|             current_dt = np.float32(min(self.dt, t_end - self.sim_time())) | ||||
| 
 | ||||
|             # Stop if end reached (should not happen) | ||||
|             if (current_dt <= 0.0): | ||||
|                 self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.simSteps())) | ||||
|             if current_dt <= 0.0: | ||||
|                 self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.sim_steps())) | ||||
|                 break | ||||
|          | ||||
| 
 | ||||
|             # Step forward in time | ||||
|             self.step(current_dt) | ||||
| 
 | ||||
|             #Print info | ||||
|             print_string = printer.getPrintString(self.simTime() - t_start) | ||||
|             if (print_string): | ||||
|             # Print info | ||||
|             print_string = printer.get_print_string(self.sim_time() - t_start) | ||||
|             if print_string: | ||||
|                 self.logger.info("%s: %s", self, print_string) | ||||
|                 try: | ||||
|                     self.check() | ||||
|                 except AssertionError as e: | ||||
|                     e.args += ("Step={:d}, time={:f}".format(self.simSteps(), self.simTime()),) | ||||
|                     e.args += ("Step={:d}, time={:f}".format(self.sim_steps(), self.sim_time()),) | ||||
|                     raise | ||||
| 
 | ||||
|     def step(self, dt: int): | ||||
| @ -218,57 +226,45 @@ class BaseSimulator(object): | ||||
| 
 | ||||
|         for i in range(self.num_substeps): | ||||
|             self.substep(dt, i) | ||||
|              | ||||
| 
 | ||||
|         self.t += dt | ||||
|         self.nt += 1 | ||||
| 
 | ||||
|     def download(self, variables=None): | ||||
|         return self.getOutput().download(self.stream, variables) | ||||
|          | ||||
|         return self.get_output().download(self.stream, variables) | ||||
| 
 | ||||
|     def synchronize(self): | ||||
|         self.stream.synchronize() | ||||
|          | ||||
|     def simTime(self): | ||||
| 
 | ||||
|     def sim_time(self): | ||||
|         return self.t | ||||
| 
 | ||||
|     def simSteps(self): | ||||
|     def sim_steps(self): | ||||
|         return self.nt | ||||
|         | ||||
|     def getExtent(self): | ||||
|         return [0, 0, self.nx*self.dx, self.ny*self.dy] | ||||
|          | ||||
|     def setBoundaryConditions(self, boundary_conditions): | ||||
| 
 | ||||
|     def get_extent(self): | ||||
|         return [0, 0, self.nx * self.dx, self.ny * self.dy] | ||||
| 
 | ||||
|     def set_boundary_conditions(self, boundary_conditions): | ||||
|         self.logger.debug("Boundary conditions set to {:s}".format(str(boundary_conditions))) | ||||
|         self.boundary_conditions = boundary_conditions.asCodedInt() | ||||
|          | ||||
|     def getBoundaryConditions(self): | ||||
|         return BoundaryCondition(BoundaryCondition.getTypes(self.boundary_conditions)) | ||||
|          | ||||
|         self.boundary_conditions = boundary_conditions.as_coded_int() | ||||
| 
 | ||||
|     def get_boundary_conditions(self): | ||||
|         return BoundaryCondition(get_types()) | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         """ | ||||
|         Function which performs one single substep with stepsize dt | ||||
|         """ | ||||
| 
 | ||||
|         raise(NotImplementedError("Needs to be implemented in subclass")) | ||||
|          | ||||
|     def getOutput(self): | ||||
|         raise(NotImplementedError("Needs to be implemented in subclass")) | ||||
|         raise (NotImplementedError("Needs to be implemented in subclass")) | ||||
| 
 | ||||
|     def get_output(self): | ||||
|         raise (NotImplementedError("Needs to be implemented in subclass")) | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.logger.warning("check() is not implemented - please implement") | ||||
|         #raise(NotImplementedError("Needs to be implemented in subclass")) | ||||
|          | ||||
|     def computeDt(self): | ||||
|         raise(NotImplementedError("Needs to be implemented in subclass")) | ||||
|         # raise(NotImplementedError("Needs to be implemented in subclass")) | ||||
| 
 | ||||
| 
 | ||||
| def stepOrderToCodedInt(step, order): | ||||
|     """ | ||||
|     Helper function which packs the step and order into a single integer | ||||
|     """ | ||||
| 
 | ||||
|     step_order = (step << 16) | (order & 0x0000ffff) | ||||
|     #print("Step:  {0:032b}".format(step)) | ||||
|     #print("Order: {0:032b}".format(order)) | ||||
|     #print("Mix:   {0:032b}".format(step_order)) | ||||
|     return np.int32(step_order) | ||||
|     def compute_dt(self): | ||||
|         raise (NotImplementedError("Needs to be implemented in subclass")) | ||||
|  | ||||
| @ -20,30 +20,31 @@ You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| #Import packages we need | ||||
| from GPUSimulators import Simulator, Common | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| # Import packages we need | ||||
| import numpy as np | ||||
| 
 | ||||
| from pycuda import gpuarray | ||||
| 
 | ||||
| from GPUSimulators import Simulator | ||||
| from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| class WAF (Simulator.BaseSimulator): | ||||
| 
 | ||||
| class WAF(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Forward-Backward linear scheme | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self,  | ||||
|     def __init__(self, | ||||
|                  context, | ||||
|                  h0, hu0, hv0,  | ||||
|                  nx, ny,  | ||||
|                  dx, dy,  | ||||
|                  g,  | ||||
|                  h0, hu0, hv0, | ||||
|                  nx, ny, | ||||
|                  dx, dy, | ||||
|                  g, | ||||
|                  cfl_scale=0.9, | ||||
|                  boundary_conditions=BoundaryCondition(),  | ||||
|                  boundary_conditions=BoundaryCondition(), | ||||
|                  block_width=16, block_height=16, | ||||
|                  dt: float=None, | ||||
|                  compile_opts: list[str]=[]): | ||||
|                  dt: float = None, | ||||
|                  compile_opts: list[str] = []): | ||||
|         """ | ||||
|         Initialization routine | ||||
| 
 | ||||
| @ -59,79 +60,79 @@ class WAF (Simulator.BaseSimulator): | ||||
|             g: Gravitational accelleration (9.81 m/s^2) | ||||
|             compile_opts: Pass a list of nvcc compiler options | ||||
|         """ | ||||
|                   | ||||
|         # Call super constructor | ||||
|         super().__init__(context,  | ||||
|             nx, ny,  | ||||
|             dx, dy,  | ||||
|             boundary_conditions, | ||||
|             cfl_scale, | ||||
|             2, | ||||
|             block_width, block_height); | ||||
|         self.g = np.float32(g)  | ||||
| 
 | ||||
|         #Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_WAF.cu",  | ||||
|                                         defines={ | ||||
|                                             'BLOCK_WIDTH': self.block_size[0],  | ||||
|                                             'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                         },  | ||||
|                                         compile_args={ | ||||
|                                             'no_extern_c': True, | ||||
|                                             'options': ["--use_fast_math"] + compile_opts,  | ||||
|                                         },  | ||||
|                                         jit_compile_args={}) | ||||
|         # Call super constructor | ||||
|         super().__init__(context, | ||||
|                          nx, ny, | ||||
|                          dx, dy, | ||||
|                          boundary_conditions, | ||||
|                          cfl_scale, | ||||
|                          2, | ||||
|                          block_width, block_height) | ||||
|         self.g = np.float32(g) | ||||
| 
 | ||||
|         # Get kernels | ||||
|         module = context.get_module("cuda/SWE2D_WAF.cu", | ||||
|                                     defines={ | ||||
|                                         'BLOCK_WIDTH': self.block_size[0], | ||||
|                                         'BLOCK_HEIGHT': self.block_size[1] | ||||
|                                     }, | ||||
|                                     compile_args={ | ||||
|                                         'no_extern_c': True, | ||||
|                                         'options': ["--use_fast_math"] + compile_opts, | ||||
|                                     }, | ||||
|                                     jit_compile_args={}) | ||||
|         self.kernel = module.get_function("WAFKernel") | ||||
|         self.kernel.prepare("iiffffiiPiPiPiPiPiPiPiiii") | ||||
|      | ||||
|         #Create data by uploading to device | ||||
|         self.u0 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [h0, hu0, hv0]) | ||||
|         self.u1 = Common.ArakawaA2D(self.stream,  | ||||
|                         nx, ny,  | ||||
|                         2, 2,  | ||||
|                         [None, None, None]) | ||||
| 
 | ||||
|         # Create data by uploading to the device | ||||
|         self.u0 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [h0, hu0, hv0]) | ||||
|         self.u1 = ArakawaA2D(self.stream, | ||||
|                              nx, ny, | ||||
|                              2, 2, | ||||
|                              [None, None, None]) | ||||
|         self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32) | ||||
|          | ||||
|         if dt == None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0))) | ||||
| 
 | ||||
|         if dt is None: | ||||
|             dt_x = np.min(self.dx / (np.abs(hu0 / h0) + np.sqrt(g * h0))) | ||||
|             dt_y = np.min(self.dy / (np.abs(hv0 / h0) + np.sqrt(g * h0))) | ||||
|             self.dt = min(dt_x, dt_y) | ||||
|         else: | ||||
|             self.dt = dt | ||||
|          | ||||
| 
 | ||||
|         self.cfl_data.fill(self.dt, stream=self.stream) | ||||
|      | ||||
| 
 | ||||
|     def substep(self, dt, step_number): | ||||
|         self.substepDimsplit(dt*0.5, step_number) | ||||
|          | ||||
|     def substepDimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,  | ||||
|                 self.nx, self.ny,  | ||||
|                 self.dx, self.dy, dt,  | ||||
|                 self.g,  | ||||
|                 substep, | ||||
|                 self.boundary_conditions,  | ||||
|                 self.u0[0].data.gpudata, self.u0[0].data.strides[0],  | ||||
|                 self.u0[1].data.gpudata, self.u0[1].data.strides[0],  | ||||
|                 self.u0[2].data.gpudata, self.u0[2].data.strides[0],  | ||||
|                 self.u1[0].data.gpudata, self.u1[0].data.strides[0],  | ||||
|                 self.u1[1].data.gpudata, self.u1[1].data.strides[0],  | ||||
|                 self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                 self.cfl_data.gpudata, | ||||
|                 0, 0, | ||||
|                 self.nx, self.ny) | ||||
|         self.substep_dimsplit(dt * 0.5, step_number) | ||||
| 
 | ||||
|     def substep_dimsplit(self, dt, substep): | ||||
|         self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, | ||||
|                                         self.nx, self.ny, | ||||
|                                         self.dx, self.dy, dt, | ||||
|                                         self.g, | ||||
|                                         substep, | ||||
|                                         self.boundary_conditions, | ||||
|                                         self.u0[0].data.gpudata, self.u0[0].data.strides[0], | ||||
|                                         self.u0[1].data.gpudata, self.u0[1].data.strides[0], | ||||
|                                         self.u0[2].data.gpudata, self.u0[2].data.strides[0], | ||||
|                                         self.u1[0].data.gpudata, self.u1[0].data.strides[0], | ||||
|                                         self.u1[1].data.gpudata, self.u1[1].data.strides[0], | ||||
|                                         self.u1[2].data.gpudata, self.u1[2].data.strides[0], | ||||
|                                         self.cfl_data.gpudata, | ||||
|                                         0, 0, | ||||
|                                         self.nx, self.ny) | ||||
|         self.u0, self.u1 = self.u1, self.u0 | ||||
| 
 | ||||
|     def getOutput(self): | ||||
|     def get_output(self): | ||||
|         return self.u0 | ||||
|          | ||||
| 
 | ||||
|     def check(self): | ||||
|         self.u0.check() | ||||
|         self.u1.check() | ||||
|          | ||||
|     def computeDt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get(); | ||||
|         return max_dt*0.5 | ||||
| 
 | ||||
|     def compute_dt(self): | ||||
|         max_dt = gpuarray.min(self.cfl_data, stream=self.stream).get() | ||||
|         return max_dt * 0.5 | ||||
|  | ||||
							
								
								
									
										9
									
								
								GPUSimulators/common/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								GPUSimulators/common/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,9 @@ | ||||
| from .arkawa_2d import ArakawaA2D | ||||
| from .common import * | ||||
| from .cuda_array_2d import CudaArray2D | ||||
| from .cuda_array_3d import CudaArray3D | ||||
| from .data_dumper import DataDumper | ||||
| from .ip_engine import IPEngine | ||||
| from .popen_file_buffer import PopenFileBuffer | ||||
| from .progress_printer import ProgressPrinter | ||||
| from .timer import Timer | ||||
							
								
								
									
										57
									
								
								GPUSimulators/common/arkawa_2d.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								GPUSimulators/common/arkawa_2d.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,57 @@ | ||||
| import logging | ||||
| 
 | ||||
| import numpy as np | ||||
| import pycuda.gpuarray | ||||
| 
 | ||||
| from GPUSimulators.common.cuda_array_2d import CudaArray2D | ||||
| 
 | ||||
| 
 | ||||
| class ArakawaA2D: | ||||
|     """ | ||||
|     A class representing an Arakawa A type (unstaggered, logically Cartesian) grid | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables): | ||||
|         """ | ||||
|         Uploads initial data to the GPU device | ||||
|         """ | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|         self.gpu_variables = [] | ||||
|         for cpu_variable in cpu_variables: | ||||
|             self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)] | ||||
| 
 | ||||
|     def __getitem__(self, key): | ||||
|         if type(key) != int: | ||||
|             raise TypeError("Indexing is int based") | ||||
| 
 | ||||
|         if key > len(self.gpu_variables) or key < 0: | ||||
|             raise IndexError("Out of bounds") | ||||
|         return self.gpu_variables[key] | ||||
| 
 | ||||
|     def download(self, stream, variables=None): | ||||
|         """ | ||||
|         Enables downloading data from the GPU device to Python | ||||
|         """ | ||||
|         if variables is None: | ||||
|             variables = range(len(self.gpu_variables)) | ||||
| 
 | ||||
|         cpu_variables = [] | ||||
|         for i in variables: | ||||
|             if i >= len (self.gpu_variables): | ||||
|                 raise IndexError(f"Variable {i} is out of range") | ||||
|             cpu_variables += [self.gpu_variables[i].download(stream, asynch=True)] | ||||
| 
 | ||||
|         # stream.synchronize() | ||||
|         return cpu_variables | ||||
| 
 | ||||
|     def check(self): | ||||
|         """ | ||||
|         Checks that data is still sane | ||||
|         """ | ||||
|         for i, gpu_variable in enumerate(self.gpu_variables): | ||||
|             var_sum = pycuda.gpuarray.sum(gpu_variable.data).get() | ||||
|             self.logger.debug(f"Data {i} with size [{gpu_variable.nx} x {gpu_variable.ny}] " | ||||
|                               + f"has average {var_sum / (gpu_variable.nx * gpu_variable.ny)}") | ||||
| 
 | ||||
|             if np.isnan(var_sum): | ||||
|                 raise ValueError("Data contains NaN values!") | ||||
							
								
								
									
										205
									
								
								GPUSimulators/common/common.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										205
									
								
								GPUSimulators/common/common.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,205 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| """ | ||||
| This python module implements the different helper functions and  | ||||
| classes | ||||
| 
 | ||||
| Copyright (C) 2018  SINTEF ICT | ||||
| 
 | ||||
| This program is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| import os | ||||
| 
 | ||||
| import numpy as np | ||||
| import time | ||||
| import subprocess | ||||
| import logging | ||||
| import json | ||||
| 
 | ||||
| from GPUSimulators.common.data_dumper import DataDumper | ||||
| from GPUSimulators.common.progress_printer import ProgressPrinter | ||||
| from GPUSimulators.common.timer import Timer | ||||
| 
 | ||||
| 
 | ||||
| def safe_call(cmd): | ||||
|     logger = logging.getLogger(__name__) | ||||
|     try: | ||||
|         #git rev-parse HEAD | ||||
|         current_dir = os.path.dirname(os.path.realpath(__file__)) | ||||
|         params = dict() | ||||
|         params['stderr'] = subprocess.STDOUT | ||||
|         params['cwd'] = current_dir | ||||
|         params['universal_newlines'] = True #text=True in more recent python | ||||
|         params['shell'] = False | ||||
|         if os.name == 'nt': | ||||
|             params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP | ||||
|         stdout = subprocess.check_output(cmd, **params) | ||||
|     except subprocess.CalledProcessError as e: | ||||
|         output = e.output | ||||
|         logger.error("Git failed, \nReturn code: " + str(e.returncode) + "\nOutput: " + output) | ||||
|         raise e | ||||
| 
 | ||||
|     return stdout | ||||
| 
 | ||||
| 
 | ||||
| def get_git_hash(): | ||||
|     return safe_call(["git", "rev-parse", "HEAD"]) | ||||
| 
 | ||||
| 
 | ||||
| def get_git_status(): | ||||
|     return safe_call(["git", "status", "--porcelain", "-uno"]) | ||||
| 
 | ||||
| 
 | ||||
| def to_json(in_dict, compressed=True): | ||||
|     """ | ||||
|     Creates JSON string from a dictionary | ||||
|     """ | ||||
| 
 | ||||
|     logger = logging.getLogger(__name__) | ||||
|     out_dict = in_dict.copy() | ||||
|     for key in out_dict: | ||||
|         if isinstance(out_dict[key], np.ndarray): | ||||
|             out_dict[key] = out_dict[key].tolist() | ||||
|         else: | ||||
|             try: | ||||
|                 json.dumps(out_dict[key]) | ||||
|             except: | ||||
|                 value = str(out_dict[key]) | ||||
|                 logger.warning("JSON: Converting {:s} to string ({:s})".format(key, value)) | ||||
|                 out_dict[key] = value | ||||
|     return json.dumps(out_dict) | ||||
| 
 | ||||
| 
 | ||||
| def run_simulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None): | ||||
|     """ | ||||
|     Runs a simulation, and store output in a netcdf file. Stores the times given in | ||||
|     save_times, and saves all the variables in list save_var_names. Elements in | ||||
|     save_var_names can be set to None if you do not want to save them | ||||
|     """ | ||||
| 
 | ||||
|     profiling_data_sim_runner = { 'start': {}, 'end': {} } | ||||
|     profiling_data_sim_runner["start"]["t_sim_init"] = 0 | ||||
|     profiling_data_sim_runner["end"]["t_sim_init"] = 0 | ||||
|     profiling_data_sim_runner["start"]["t_nc_write"] = 0 | ||||
|     profiling_data_sim_runner["end"]["t_nc_write"] = 0 | ||||
|     profiling_data_sim_runner["start"]["t_full_step"] = 0 | ||||
|     profiling_data_sim_runner["end"]["t_full_step"] = 0 | ||||
| 
 | ||||
|     profiling_data_sim_runner["start"]["t_sim_init"] = time.time() | ||||
| 
 | ||||
|     logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|     if len(save_times <= 0): | ||||
|         raise ValueError("Need to specify which times to save") | ||||
| 
 | ||||
|     with Timer("construct") as t: | ||||
|         sim = simulator(**simulator_args) | ||||
|     logger.info(f"Constructed in {str(t.secs)} seconds") | ||||
| 
 | ||||
|     #Create a netcdf file and simulate | ||||
|     with DataDumper(outfile, mode='w', clobber=False) as outdata: | ||||
|          | ||||
|         #Create attributes (metadata) | ||||
|         outdata.ncfile.created = time.ctime(time.time()) | ||||
|         outdata.ncfile.git_hash = get_git_hash() | ||||
|         outdata.ncfile.git_status = get_git_status() | ||||
|         outdata.ncfile.simulator = str(simulator) | ||||
|          | ||||
|         # do not write fields to attributes (they are to large) | ||||
|         simulator_args_for_ncfile = simulator_args.copy() | ||||
|         del simulator_args_for_ncfile["rho"] | ||||
|         del simulator_args_for_ncfile["rho_u"] | ||||
|         del simulator_args_for_ncfile["rho_v"] | ||||
|         del simulator_args_for_ncfile["E"] | ||||
|         outdata.ncfile.sim_args = to_json(simulator_args_for_ncfile) | ||||
|          | ||||
|         #Create dimensions | ||||
|         outdata.ncfile.createDimension('time', len(save_times)) | ||||
|         outdata.ncfile.createDimension('x', simulator_args['nx']) | ||||
|         outdata.ncfile.createDimension('y', simulator_args['ny']) | ||||
| 
 | ||||
|         #Create variables for dimensions | ||||
|         ncvars = {'time': outdata.ncfile.createVariable('time', np.dtype('float32').char, 'time'), | ||||
|                   'x': outdata.ncfile.createVariable('x', np.dtype('float32').char, 'x'), | ||||
|                   'y': outdata.ncfile.createVariable('y', np.dtype('float32').char, 'y')} | ||||
| 
 | ||||
|         #Fill variables with proper values | ||||
|         ncvars['time'][:] = save_times | ||||
|         extent = sim.get_extent() | ||||
|         ncvars['x'][:] = np.linspace(extent[0], extent[1], simulator_args['nx']) | ||||
|         ncvars['y'][:] = np.linspace(extent[2], extent[3], simulator_args['ny']) | ||||
|          | ||||
|         #Choose which variables to download (prune None from the list, but keep the index) | ||||
|         download_vars = [] | ||||
|         for i, var_name in enumerate(save_var_names): | ||||
|             if var_name is not None: | ||||
|                 download_vars += [i] | ||||
|         save_var_names = list(save_var_names[i] for i in download_vars) | ||||
|          | ||||
|         #Create variables | ||||
|         for var_name in save_var_names: | ||||
|             ncvars[var_name] = outdata.ncfile.createVariable( | ||||
|                 var_name, np.dtype('float32').char, ('time', 'y', 'x'), zlib=True, least_significant_digit=3) | ||||
| 
 | ||||
|         #Create step sizes between each save | ||||
|         t_steps = np.empty_like(save_times) | ||||
|         t_steps[0] = save_times[0] | ||||
|         t_steps[1:] = save_times[1:] - save_times[0:-1] | ||||
| 
 | ||||
|         profiling_data_sim_runner["end"]["t_sim_init"] = time.time() | ||||
| 
 | ||||
|         # Start simulation loop | ||||
|         progress_printer = ProgressPrinter(save_times[-1], print_every=10) | ||||
|         for k in range(len(save_times)): | ||||
|             # Get target time and step size there | ||||
|             t_step = t_steps[k] | ||||
|             t_end = save_times[k] | ||||
|              | ||||
|             # Sanity check simulator | ||||
|             try: | ||||
|                 sim.check() | ||||
|             except AssertionError as e: | ||||
|                 logger.error(f"Error after {sim.sim_steps()} steps (t={sim.sim_time()}: {str(e)}") | ||||
|                 return outdata.filename | ||||
| 
 | ||||
|             profiling_data_sim_runner["start"]["t_full_step"] += time.time() | ||||
| 
 | ||||
|             # Simulate | ||||
|             if t_step > 0.0: | ||||
|                 sim.simulate(t_step, dt) | ||||
| 
 | ||||
|             profiling_data_sim_runner["end"]["t_full_step"] += time.time() | ||||
| 
 | ||||
|             profiling_data_sim_runner["start"]["t_nc_write"] += time.time() | ||||
| 
 | ||||
|             #Download | ||||
|             save_vars = sim.download(download_vars) | ||||
|              | ||||
|             #Save to file | ||||
|             for i, var_name in enumerate(save_var_names): | ||||
|                 ncvars[var_name][k, :] = save_vars[i] | ||||
| 
 | ||||
|             profiling_data_sim_runner["end"]["t_nc_write"] += time.time() | ||||
| 
 | ||||
|             #Write progress to screen | ||||
|             print_string = progress_printer.get_print_string(t_end) | ||||
|             if print_string: | ||||
|                 logger.debug(print_string) | ||||
|                  | ||||
|         logger.debug(f"Simulated to t={t_end} in " | ||||
|                      + f"{sim.sim_steps()} timesteps (average dt={sim.sim_time() / sim.sim_steps()})") | ||||
| 
 | ||||
|     return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi | ||||
|      | ||||
							
								
								
									
										139
									
								
								GPUSimulators/common/cuda_array_2d.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								GPUSimulators/common/cuda_array_2d.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,139 @@ | ||||
| import logging | ||||
| 
 | ||||
| import numpy as np | ||||
| 
 | ||||
| import pycuda.gpuarray | ||||
| import pycuda.driver as cuda | ||||
| from pycuda.tools import PageLockedMemoryPool | ||||
| 
 | ||||
| 
 | ||||
| class CudaArray2D: | ||||
|     """ | ||||
|     Class that holds 2D CUDA data | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32): | ||||
|         """ | ||||
|         Uploads initial data to the CUDA device | ||||
|         """ | ||||
| 
 | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|         self.nx = nx | ||||
|         self.ny = ny | ||||
|         self.x_halo = x_halo | ||||
|         self.y_halo = y_halo | ||||
| 
 | ||||
|         nx_halo = nx + 2 * x_halo | ||||
|         ny_halo = ny + 2 * y_halo | ||||
| 
 | ||||
|         # self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny) | ||||
|         # Should perhaps use pycuda.driver.mem_alloc_data.pitch() here | ||||
|         self.data = pycuda.gpuarray.zeros((ny_halo, nx_halo), dtype) | ||||
| 
 | ||||
|         # For returning to download | ||||
|         self.memorypool = PageLockedMemoryPool() | ||||
| 
 | ||||
|         # If we don't have any data, just allocate and return | ||||
|         if cpu_data is None: | ||||
|             return | ||||
| 
 | ||||
|         # Make sure data is in proper format | ||||
|         if cpu_data.shape != (ny_halo, nx_halo) and cpu_data.shape != (self.ny, self.nx): | ||||
|             raise ValueError( | ||||
|                 f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.ny, self.nx))} / {str((ny_halo, nx_halo))}") | ||||
| 
 | ||||
|         if cpu_data.itemsize != 4: | ||||
|             raise ValueError("Wrong size of data type") | ||||
| 
 | ||||
|         if np.isfortran(cpu_data): | ||||
|             raise TypeError("Wrong datatype (Fortran, expected C)") | ||||
| 
 | ||||
|         # Create a copy object from host to device | ||||
|         x = (nx_halo - cpu_data.shape[1]) // 2 | ||||
|         y = (ny_halo - cpu_data.shape[0]) // 2 | ||||
|         self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]]) | ||||
|         # self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny) | ||||
| 
 | ||||
|     def __del__(self, *args): | ||||
|         # self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny) | ||||
|         self.data.gpudata.free() | ||||
|         self.data = None | ||||
| 
 | ||||
|     def download(self, stream, cpu_data=None, asynch=False, extent=None): | ||||
|         """ | ||||
|         Enables downloading data from GPU to Python | ||||
|         """ | ||||
| 
 | ||||
|         if extent is None: | ||||
|             x = self.x_halo | ||||
|             y = self.y_halo | ||||
|             nx = self.nx | ||||
|             ny = self.ny | ||||
|         else: | ||||
|             x, y, nx, ny = extent | ||||
| 
 | ||||
|         if cpu_data is None: | ||||
|             # self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) | ||||
|             # Allocate host memory | ||||
|             # The following fails, don't know why (crashes python) | ||||
|             cpu_data = cuda.pagelocked_empty((int(ny), int(nx)), dtype=np.float32, | ||||
|                                              mem_flags=cuda.host_alloc_flags.PORTABLE) | ||||
|             # Non-pagelocked: cpu_data = np.empty((ny, nx), dtype=np.float32) | ||||
|             # cpu_data = self.memorypool.allocate((ny, nx), dtype=np.float32) | ||||
| 
 | ||||
|         assert nx == cpu_data.shape[1] | ||||
|         assert ny == cpu_data.shape[0] | ||||
|         assert x + nx <= self.nx + 2 * self.x_halo | ||||
|         assert y + ny <= self.ny + 2 * self.y_halo | ||||
| 
 | ||||
|         # Create a copy object from device to host | ||||
|         copy = cuda.Memcpy2D() | ||||
|         copy.set_src_device(self.data.gpudata) | ||||
|         copy.set_dst_host(cpu_data) | ||||
| 
 | ||||
|         # Set offsets and pitch of a source | ||||
|         copy.src_x_in_bytes = int(x) * self.data.strides[1] | ||||
|         copy.src_y = int(y) | ||||
|         copy.src_pitch = self.data.strides[0] | ||||
| 
 | ||||
|         # Set width in bytes to copy for each row and | ||||
|         # number of rows to copy | ||||
|         copy.width_in_bytes = int(nx) * cpu_data.itemsize | ||||
|         copy.height = int(ny) | ||||
| 
 | ||||
|         copy(stream) | ||||
|         if not asynch: | ||||
|             stream.synchronize() | ||||
| 
 | ||||
|         return cpu_data | ||||
| 
 | ||||
|     def upload(self, stream, cpu_data, extent=None): | ||||
|         if extent is None: | ||||
|             x = self.x_halo | ||||
|             y = self.y_halo | ||||
|             nx = self.nx | ||||
|             ny = self.ny | ||||
|         else: | ||||
|             x, y, nx, ny = extent | ||||
| 
 | ||||
|         assert (nx == cpu_data.shape[1]) | ||||
|         assert (ny == cpu_data.shape[0]) | ||||
|         assert (x + nx <= self.nx + 2 * self.x_halo) | ||||
|         assert (y + ny <= self.ny + 2 * self.y_halo) | ||||
| 
 | ||||
|         # Create a copy object from device to host | ||||
|         copy = cuda.Memcpy2D() | ||||
|         copy.set_dst_device(self.data.gpudata) | ||||
|         copy.set_src_host(cpu_data) | ||||
| 
 | ||||
|         # Set offsets and pitch of a source | ||||
|         copy.dst_x_in_bytes = int(x) * self.data.strides[1] | ||||
|         copy.dst_y = int(y) | ||||
|         copy.dst_pitch = self.data.strides[0] | ||||
| 
 | ||||
|         # Set width in bytes to copy for each row and | ||||
|         # number of rows to copy | ||||
|         copy.width_in_bytes = int(nx) * cpu_data.itemsize | ||||
|         copy.height = int(ny) | ||||
| 
 | ||||
|         copy(stream) | ||||
							
								
								
									
										120
									
								
								GPUSimulators/common/cuda_array_3d.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										120
									
								
								GPUSimulators/common/cuda_array_3d.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,120 @@ | ||||
| import logging | ||||
| 
 | ||||
| import numpy as np | ||||
| import pycuda.gpuarray | ||||
| import pycuda.driver as cuda | ||||
| from pycuda.tools import PageLockedMemoryPool | ||||
| 
 | ||||
| 
 | ||||
| class CudaArray3D: | ||||
|     """ | ||||
|     Class that holds 3D data | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32): | ||||
|         """ | ||||
|         Uploads initial data to the CL device | ||||
|         """ | ||||
| 
 | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|         self.nx = nx | ||||
|         self.ny = ny | ||||
|         self.nz = nz | ||||
|         self.x_halo = x_halo | ||||
|         self.y_halo = y_halo | ||||
|         self.z_halo = z_halo | ||||
| 
 | ||||
|         nx_halo = nx + 2 * x_halo | ||||
|         ny_halo = ny + 2 * y_halo | ||||
|         nz_halo = nz + 2 * z_halo | ||||
| 
 | ||||
|         # self.logger.debug("Allocating [%dx%dx%d] buffer", self.nx, self.ny, self.nz) | ||||
|         # Should perhaps use pycuda.driver.mem_alloc_data.pitch() here | ||||
|         self.data = pycuda.gpuarray.zeros((nz_halo, ny_halo, nx_halo), dtype) | ||||
| 
 | ||||
|         # For returning to download | ||||
|         self.memorypool = PageLockedMemoryPool() | ||||
| 
 | ||||
|         # If we don't have any data, just allocate and return | ||||
|         if cpu_data is None: | ||||
|             return | ||||
| 
 | ||||
|         # Make sure data is in proper format | ||||
|         if (cpu_data.shape != (nz_halo, ny_halo, nx_halo) | ||||
|                 and cpu_data.shape != (self.nz, self.ny, self.nx)): | ||||
|             raise ValueError(f"Wrong shape of data {str(cpu_data.shape)} vs {str((self.nz, self.ny, self.nx))} / {str((nz_halo, ny_halo, nx_halo))}") | ||||
| 
 | ||||
|         if cpu_data.itemsize != 4: | ||||
|             raise ValueError("Wrong size of data type") | ||||
| 
 | ||||
|         if np.isfortran(cpu_data): | ||||
|             raise TypeError("Wrong datatype (Fortran, expected C)") | ||||
| 
 | ||||
|         # Create a copy object from host to device | ||||
|         copy = cuda.Memcpy3D() | ||||
|         copy.set_src_host(cpu_data) | ||||
|         copy.set_dst_device(self.data.gpudata) | ||||
| 
 | ||||
|         # Set offsets of destination | ||||
|         x_offset = (nx_halo - cpu_data.shape[2]) // 2 | ||||
|         y_offset = (ny_halo - cpu_data.shape[1]) // 2 | ||||
|         z_offset = (nz_halo - cpu_data.shape[0]) // 2 | ||||
|         copy.dst_x_in_bytes = x_offset * self.data.strides[1] | ||||
|         copy.dst_y = y_offset | ||||
|         copy.dst_z = z_offset | ||||
| 
 | ||||
|         # Set pitch of destination | ||||
|         copy.dst_pitch = self.data.strides[0] | ||||
| 
 | ||||
|         # Set width in bytes to copy for each row and | ||||
|         # number of rows to copy | ||||
|         width = max(self.nx, cpu_data.shape[2]) | ||||
|         height = max(self.ny, cpu_data.shape[1]) | ||||
|         depth = max(self.nz, cpu - data.shape[0]) | ||||
|         copy.width_in_bytes = width * cpu_data.itemsize | ||||
|         copy.height = height | ||||
|         copy.depth = depth | ||||
| 
 | ||||
|         # Perform the copy | ||||
|         copy(stream) | ||||
| 
 | ||||
|         # self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny) | ||||
| 
 | ||||
|     def __del__(self, *args): | ||||
|         # self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny) | ||||
|         self.data.gpudata.free() | ||||
|         self.data = None | ||||
| 
 | ||||
|     def download(self, stream, asynch=False): | ||||
|         """ | ||||
|         Enables downloading data from GPU to Python | ||||
|         """ | ||||
| 
 | ||||
|         # self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny) | ||||
|         # Allocate host memory | ||||
|         # cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32) | ||||
|         # cpu_data = np.empty((self.nz, self.ny, self.nx), dtype=np.float32) | ||||
|         cpu_data = self.memorypool.allocate((self.nz, self.ny, self.nx), dtype=np.float32) | ||||
| 
 | ||||
|         # Create a copy object from device to host | ||||
|         copy = cuda.Memcpy2D() | ||||
|         copy.set_src_device(self.data.gpudata) | ||||
|         copy.set_dst_host(cpu_data) | ||||
| 
 | ||||
|         # Set offsets and pitch of a source | ||||
|         copy.src_x_in_bytes = self.x_halo * self.data.strides[1] | ||||
|         copy.src_y = self.y_halo | ||||
|         copy.src_z = self.z_halo | ||||
|         copy.src_pitch = self.data.strides[0] | ||||
| 
 | ||||
|         # Set width in bytes to copy for each row and | ||||
|         # number of rows to copy | ||||
|         copy.width_in_bytes = self.nx * cpu_data.itemsize | ||||
|         copy.height = self.ny | ||||
|         copy.depth = self.nz | ||||
| 
 | ||||
|         copy(stream) | ||||
|         if not asynch: | ||||
|             stream.synchronize() | ||||
| 
 | ||||
|         return cpu_data | ||||
							
								
								
									
										79
									
								
								GPUSimulators/common/data_dumper.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										79
									
								
								GPUSimulators/common/data_dumper.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,79 @@ | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| 
 | ||||
| import netCDF4 | ||||
| import numpy as np | ||||
| 
 | ||||
| 
 | ||||
| def to_json(in_dict): | ||||
|     out_dict = in_dict.copy() | ||||
| 
 | ||||
|     for key in out_dict: | ||||
|         if isinstance(out_dict[key], np.ndarray): | ||||
|             out_dict[key] = out_dict[key].tolist() | ||||
|         else: | ||||
|             try: | ||||
|                 json.dumps(out_dict[key]) | ||||
|             except: | ||||
|                 out_dict[key] = str(out_dict[key]) | ||||
| 
 | ||||
|     return json.dumps(out_dict) | ||||
| 
 | ||||
| 
 | ||||
| class DataDumper(object): | ||||
|     """ | ||||
|     Simple class for holding a netCDF4 object | ||||
|     (handles opening and closing nicely) | ||||
|     Use as | ||||
|     with DataDumper("filename") as data: | ||||
|         ... | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, filename, *args, **kwargs): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|         # Create directory if needed | ||||
|         filename = os.path.abspath(filename) | ||||
|         dirname = os.path.dirname(filename) | ||||
|         if dirname and not os.path.isdir(dirname): | ||||
|             self.logger.info("Creating directory " + dirname) | ||||
|             os.makedirs(dirname) | ||||
| 
 | ||||
|         # Get mode of a file if we have that | ||||
|         mode = None | ||||
|         if args: | ||||
|             mode = args[0] | ||||
|         elif kwargs and 'mode' in kwargs.keys(): | ||||
|             mode = kwargs['mode'] | ||||
| 
 | ||||
|         # Create a new unique file if writing | ||||
|         if mode: | ||||
|             if ("w" in mode) or ("+" in mode) or ("a" in mode): | ||||
|                 i = 0 | ||||
|                 stem, ext = os.path.splitext(filename) | ||||
|                 while os.path.isfile(filename): | ||||
|                     filename = f"{stem}_{str(i).zfill(4)}{ext}" | ||||
|                     i = i + 1 | ||||
|         self.filename = os.path.abspath(filename) | ||||
| 
 | ||||
|         # Save arguments | ||||
|         self.args = args | ||||
|         self.kwargs = kwargs | ||||
| 
 | ||||
|         # Log output | ||||
|         self.logger.info("Initialized " + self.filename) | ||||
| 
 | ||||
|     def __enter__(self): | ||||
|         self.logger.info("Opening " + self.filename) | ||||
|         if self.args: | ||||
|             self.logger.info("Arguments: " + str(self.args)) | ||||
|         if self.kwargs: | ||||
|             self.logger.info("Keyword arguments: " + str(self.kwargs)) | ||||
|         self.ncfile = netCDF4.Dataset(self.filename, *self.args, **self.kwargs) | ||||
|         return self | ||||
| 
 | ||||
|     def __exit__(self, *args): | ||||
|         self.logger.info("Closing " + self.filename) | ||||
|         self.ncfile.close() | ||||
| 
 | ||||
							
								
								
									
										101
									
								
								GPUSimulators/common/ip_engine.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								GPUSimulators/common/ip_engine.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,101 @@ | ||||
| import gc | ||||
| import logging | ||||
| import os | ||||
| import signal | ||||
| import subprocess | ||||
| import time | ||||
| 
 | ||||
| from GPUSimulators.common.popen_file_buffer import PopenFileBuffer | ||||
| 
 | ||||
| 
 | ||||
| class IPEngine(object): | ||||
|     """ | ||||
|     Class for starting IPEngines for MPI processing in IPython | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, n_engines): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|         # Start ipcontroller | ||||
|         self.logger.info("Starting IPController") | ||||
|         self.c_buff = PopenFileBuffer() | ||||
|         c_cmd = ["ipcontroller", "--ip='*'"] | ||||
|         c_params = dict() | ||||
|         c_params['stderr'] = self.c_buff.stderr | ||||
|         c_params['stdout'] = self.c_buff.stdout | ||||
|         c_params['shell'] = False | ||||
|         if os.name == 'nt': | ||||
|             c_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP | ||||
|         self.c = subprocess.Popen(c_cmd, **c_params) | ||||
| 
 | ||||
|         # Wait until the controller is running | ||||
|         time.sleep(3) | ||||
| 
 | ||||
|         # Start engines | ||||
|         self.logger.info("Starting IPEngines") | ||||
|         self.e_buff = PopenFileBuffer() | ||||
|         e_cmd = ["mpiexec", "-n", str(n_engines), "ipengine", "--mpi"] | ||||
|         e_params = dict() | ||||
|         e_params['stderr'] = self.e_buff.stderr | ||||
|         e_params['stdout'] = self.e_buff.stdout | ||||
|         e_params['shell'] = False | ||||
|         if os.name == 'nt': | ||||
|             e_params['creationflags'] = subprocess.CREATE_NEW_PROCESS_GROUP | ||||
|         self.e = subprocess.Popen(e_cmd, **e_params) | ||||
| 
 | ||||
|         # attach to a running cluster | ||||
|         import ipyparallel | ||||
|         self.cluster = ipyparallel.Client()  # profile='mpi') | ||||
|         time.sleep(3) | ||||
|         while len(self.cluster.ids) != n_engines: | ||||
|             time.sleep(0.5) | ||||
|             self.logger.info("Waiting for cluster...") | ||||
|             self.cluster = ipyparallel.Client()  # profile='mpi') | ||||
| 
 | ||||
|         self.logger.info("Done") | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         self.shutdown() | ||||
| 
 | ||||
|     def shutdown(self): | ||||
|         if self.e is not None: | ||||
|             if os.name == 'nt': | ||||
|                 self.logger.warning("Sending CTRL+C to IPEngine") | ||||
|                 self.e.send_signal(signal.CTRL_C_EVENT) | ||||
| 
 | ||||
|             try: | ||||
|                 self.e.communicate(timeout=3) | ||||
|                 self.e.kill() | ||||
|             except subprocess.TimeoutExpired: | ||||
|                 self.logger.warning("Killing IPEngine") | ||||
|                 self.e.kill() | ||||
|                 self.e.communicate() | ||||
|             self.e = None | ||||
| 
 | ||||
|             cout, cerr = self.e_buff.read() | ||||
|             self.logger.info(f"IPEngine cout: {cout}") | ||||
|             self.logger.info(f"IPEngine cerr: {cerr}") | ||||
|             self.e_buff = None | ||||
| 
 | ||||
|             gc.collect() | ||||
| 
 | ||||
|         if self.c is not None: | ||||
|             if os.name == 'nt': | ||||
|                 self.logger.warning("Sending CTRL+C to IPController") | ||||
|                 self.c.send_signal(signal.CTRL_C_EVENT) | ||||
| 
 | ||||
|             try: | ||||
|                 self.c.communicate(timeout=3) | ||||
|                 self.c.kill() | ||||
|             except subprocess.TimeoutExpired: | ||||
|                 self.logger.warning("Killing IPController") | ||||
|                 self.c.kill() | ||||
|                 self.c.communicate() | ||||
|             self.c = None | ||||
| 
 | ||||
|             cout, cerr = self.c_buff.read() | ||||
|             self.logger.info(f"IPController cout: {cout}") | ||||
|             self.logger.info(f"IPController cerr: {cerr}") | ||||
|             self.c_buff = None | ||||
| 
 | ||||
|             gc.collect() | ||||
							
								
								
									
										27
									
								
								GPUSimulators/common/popen_file_buffer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								GPUSimulators/common/popen_file_buffer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | ||||
| import tempfile | ||||
| 
 | ||||
| 
 | ||||
| class PopenFileBuffer(object): | ||||
|     """ | ||||
|     Simple class for holding a set of temp files | ||||
|     for communicating with a subprocess | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         self.stdout = tempfile.TemporaryFile(mode='w+t') | ||||
|         self.stderr = tempfile.TemporaryFile(mode='w+t') | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         self.stdout.close() | ||||
|         self.stderr.close() | ||||
| 
 | ||||
|     def read(self): | ||||
|         self.stdout.seek(0) | ||||
|         cout = self.stdout.read() | ||||
|         self.stdout.seek(0, 2) | ||||
| 
 | ||||
|         self.stderr.seek(0) | ||||
|         cerr = self.stderr.read() | ||||
|         self.stderr.seek(0, 2) | ||||
| 
 | ||||
|         return cout, cerr | ||||
							
								
								
									
										62
									
								
								GPUSimulators/common/progress_printer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										62
									
								
								GPUSimulators/common/progress_printer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,62 @@ | ||||
| import logging | ||||
| import time | ||||
| 
 | ||||
| import numpy as np | ||||
| 
 | ||||
| 
 | ||||
| def time_string(seconds): | ||||
|     seconds = int(max(seconds, 1)) | ||||
|     minutes, seconds = divmod(seconds, 60) | ||||
|     hours, minutes = divmod(minutes, 60) | ||||
|     periods = [('h', hours), ('m', minutes), ('s', seconds)] | ||||
|     return_string = ' '.join('{}{}'.format(value, name) | ||||
|                              for name, value in periods | ||||
|                              if value) | ||||
|     return return_string | ||||
| 
 | ||||
| 
 | ||||
| def progress_bar(step, total_steps, width=30): | ||||
|     progress = np.round(width * step / total_steps).astype(np.int32) | ||||
|     progressbar = "0% [" + "#" * progress + "=" * (width - progress) + "] 100%" | ||||
|     return progressbar | ||||
| 
 | ||||
| 
 | ||||
| class ProgressPrinter(object): | ||||
|     """ | ||||
|     Small helper class for creating a progress bar | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, total_steps, print_every=5): | ||||
|         self.logger = logging.getLogger(__name__) | ||||
|         self.start = time.time() | ||||
|         self.total_steps = total_steps | ||||
|         self.print_every = print_every | ||||
|         self.next_print_time = self.print_every | ||||
|         self.last_step = 0 | ||||
|         self.secs_per_iter = None | ||||
| 
 | ||||
|     def get_print_string(self, step): | ||||
|         elapsed = time.time() - self.start | ||||
|         if elapsed > self.next_print_time: | ||||
|             dt = elapsed - (self.next_print_time - self.print_every) | ||||
|             dsteps = step - self.last_step | ||||
|             steps_remaining = self.total_steps - step | ||||
| 
 | ||||
|             if dsteps == 0: | ||||
|                 return None | ||||
| 
 | ||||
|             self.last_step = step | ||||
|             self.next_print_time = elapsed + self.print_every | ||||
| 
 | ||||
|             if not self.secs_per_iter: | ||||
|                 self.secs_per_iter = dt / dsteps | ||||
|             self.secs_per_iter = 0.2 * self.secs_per_iter + 0.8 * (dt / dsteps) | ||||
| 
 | ||||
|             remaining_time = steps_remaining * self.secs_per_iter | ||||
| 
 | ||||
|             return (f"{progress_bar(step, self.total_steps)}. " | ||||
|                     + f"Total: {time_string(elapsed + remaining_time)}, " | ||||
|                     + f"elapsed: {time_string(elapsed)}, " | ||||
|                     + f"remaining: {time_string(remaining_time)}") | ||||
| 
 | ||||
|         return None | ||||
							
								
								
									
										26
									
								
								GPUSimulators/common/timer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								GPUSimulators/common/timer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | ||||
| import logging | ||||
| import time | ||||
| 
 | ||||
| 
 | ||||
| class Timer(object): | ||||
|     """ | ||||
|     Class which keeps track of time spent for a section of code | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self, tag, log_level=logging.DEBUG): | ||||
|         self.tag = tag | ||||
|         self.log_level = log_level | ||||
|         self.logger = logging.getLogger(__name__) | ||||
| 
 | ||||
|     def __enter__(self): | ||||
|         self.start = time.time() | ||||
|         return self | ||||
| 
 | ||||
|     def __exit__(self, *args): | ||||
|         self.end = time.time() | ||||
|         self.secs = self.end - self.start | ||||
|         self.msecs = self.secs * 1000  # milliseconds | ||||
|         self.logger.log(self.log_level, f"{self.tag}: {self.msecs} ms") | ||||
| 
 | ||||
|     def elapsed(self): | ||||
|         return time.time() - self.start | ||||
| @ -0,0 +1,2 @@ | ||||
| from .cuda_context import CudaContext | ||||
| from .hip_context import HIPContext | ||||
| @ -21,8 +21,6 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| 
 | ||||
| import os | ||||
| 
 | ||||
| import numpy as np | ||||
| import time | ||||
| import re | ||||
| import io | ||||
| import hashlib | ||||
| @ -33,8 +31,8 @@ import pycuda.compiler as cuda_compiler | ||||
| import pycuda.gpuarray | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| from GPUSimulators import Autotuner, Common | ||||
| from GPUSimulators.gpu.Context import Context | ||||
| from GPUSimulators import Autotuner | ||||
| from GPUSimulators.common import common | ||||
| 
 | ||||
| 
 | ||||
| class CudaContext(object): | ||||
| @ -3,10 +3,10 @@ import io | ||||
| import os.path | ||||
| 
 | ||||
| import hip as hip_main | ||||
| from hip import hip, hiprtc | ||||
| from hip import hip | ||||
| 
 | ||||
| from GPUSimulators import Common | ||||
| from GPUSimulators.gpu.Context import Context | ||||
| from GPUSimulators.common import common | ||||
| from GPUSimulators.gpu.context import Context | ||||
| 
 | ||||
| 
 | ||||
| class HIPContext(Context): | ||||
| @ -52,9 +52,7 @@ | ||||
|    "execution_count": 1, | ||||
|    "metadata": {}, | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "from GPUSimulators import IPythonMagic" | ||||
|    ] | ||||
|    "source": "" | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
| @ -115,10 +113,10 @@ | ||||
|     "import numpy as np\n", | ||||
|     "from matplotlib import pyplot as plt\n", | ||||
|     "from mpi4py import MPI\n", | ||||
|     "import time\n", | ||||
|     "import json\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators import IPythonMagic, MPISimulator, Common" | ||||
|     "from GPUSimulators import MPISimulator\n", | ||||
|     "from GPUSimulators.common import common" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @ -317,7 +315,6 @@ | ||||
|     "%%px\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions\n", | ||||
|     "from GPUSimulators.Simulator import BoundaryCondition\n", | ||||
|     "\n", | ||||
|     "my_context.autotuner = None\n", | ||||
|     "\n", | ||||
| @ -348,7 +345,7 @@ | ||||
|     "    return sim\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)" | ||||
|     "outfile = Common.run_simulation(genSim, arguments, outfile, save_times, save_var_names)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
| @ -657,7 +654,7 @@ | ||||
|     "    sim = MPISimulator.MPISimulator(local_sim, grid)\n", | ||||
|     "    return sim\n", | ||||
|     "\n", | ||||
|     "outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)" | ||||
|     "outfile = Common.run_simulation(genSim, arguments, outfile, save_times, save_var_names)" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|  | ||||
| @ -13,19 +13,10 @@ | ||||
|     "%load_ext line_profiler\n", | ||||
|     "\n", | ||||
|     "#Import packages we need\n", | ||||
|     "import numpy as np\n", | ||||
|     "from matplotlib import animation, rc\n", | ||||
|     "from matplotlib import pyplot as plt\n", | ||||
|     "\n", | ||||
|     "import subprocess\n", | ||||
|     "import os\n", | ||||
|     "import gc\n", | ||||
|     "import datetime\n", | ||||
|     "import importlib\n", | ||||
|     "import logging\n", | ||||
|     "\n", | ||||
|     "import pycuda.driver as cuda\n", | ||||
|     "import pycuda.compiler\n", | ||||
|     "\n", | ||||
|     "try:\n", | ||||
|     "    from StringIO import StringIO\n", | ||||
| @ -37,7 +28,7 @@ | ||||
|     "rc('figure', figsize=(16.0, 12.0))\n", | ||||
|     "rc('animation', html='html5')\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators import Common, IPythonMagic\n", | ||||
|     "from GPUSimulators.common import common\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions" | ||||
|    ] | ||||
|   }, | ||||
| @ -129,7 +120,7 @@ | ||||
|     "    h = sim.u0[0].download(sim.stream)\n", | ||||
|     "    \n", | ||||
|     "    plt.figure()\n", | ||||
|     "    plt.title(str(sim) + \", t=\" + str(sim.simTime()) + \", nt=\" + str(sim.simSteps()))\n", | ||||
|     "    plt.title(str(sim) + \", t=\" + str(sim.sim_time()) + \", nt=\" + str(sim.sim_steps()))\n", | ||||
|     "    extent = [0, sim.dx*sim.nx, 0, sim.dy*sim.ny]\n", | ||||
|     "    plt.imshow(h, vmin=0.49, vmax=0.52, extent=extent)\n", | ||||
|     "    plt.colorbar()" | ||||
| @ -292,16 +283,16 @@ | ||||
|      "evalue": "", | ||||
|      "output_type": "error", | ||||
|      "traceback": [ | ||||
|       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||||
|       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)", | ||||
|       "Cell \u001b[0;32mIn[10], line 5\u001b[0m\n\u001b[1;32m      2\u001b[0m importlib\u001b[38;5;241m.\u001b[39mreload(KP07)\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Common\u001b[38;5;241m.\u001b[39mTimer(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mconstruct\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m t:\n\u001b[0;32m----> 5\u001b[0m     sim \u001b[38;5;241m=\u001b[39m \u001b[43mKP07\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mKP07\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43marguments\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m Common\u001b[38;5;241m.\u001b[39mTimer(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstep\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m t:\n\u001b[1;32m      8\u001b[0m     t \u001b[38;5;241m=\u001b[39m sim\u001b[38;5;241m.\u001b[39msimulate(t_end)\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/KP07.py:70\u001b[0m, in \u001b[0;36mKP07.__init__\u001b[0;34m(self, context, h0, hu0, hv0, nx, ny, dx, dy, g, theta, cfl_scale, order, boundary_conditions, block_width, block_height, dt, compile_opts)\u001b[0m\n\u001b[1;32m     53\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     54\u001b[0m \u001b[38;5;124;03mInitialization routine\u001b[39;00m\n\u001b[1;32m     55\u001b[0m \u001b[38;5;124;03m\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     66\u001b[0m \u001b[38;5;124;03m    compile_opts: Pass a list of nvcc compiler options\u001b[39;00m\n\u001b[1;32m     67\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     69\u001b[0m \u001b[38;5;66;03m# Call super constructor\u001b[39;00m\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcontext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     71\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     72\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m     73\u001b[0m \u001b[43m    \u001b[49m\u001b[43mboundary_conditions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     74\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcfl_scale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     75\u001b[0m \u001b[43m    \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     76\u001b[0m \u001b[43m    \u001b[49m\u001b[43mblock_width\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock_height\u001b[49m\u001b[43m)\u001b[49m;\n\u001b[1;32m     77\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mg \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mfloat32(g)             \n\u001b[1;32m     78\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtheta \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mfloat32(theta) \n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Simulator.py:146\u001b[0m, in \u001b[0;36mBaseSimulator.__init__\u001b[0;34m(self, context, nx, ny, dx, dy, boundary_conditions, cfl_scale, num_substeps, block_width, block_height)\u001b[0m\n\u001b[1;32m    144\u001b[0m \u001b[38;5;66;03m#Handle autotuning block size\u001b[39;00m\n\u001b[1;32m    145\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontext\u001b[38;5;241m.\u001b[39mautotuner:\n\u001b[0;32m--> 146\u001b[0m     peak_configuration \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcontext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_peak_performance\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;18;43m__class__\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    147\u001b[0m     block_width \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(peak_configuration[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m    148\u001b[0m     block_height \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mint\u001b[39m(peak_configuration[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mblock_height\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:121\u001b[0m, in \u001b[0;36mAutotuner.get_peak_performance\u001b[0;34m(self, simulator)\u001b[0m\n\u001b[1;32m    119\u001b[0m     logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCould not get autotuned peak performance for \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: benchmarking\u001b[39m\u001b[38;5;124m\"\u001b[39m, key)\n\u001b[1;32m    120\u001b[0m     data\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m--> 121\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbenchmark\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    122\u001b[0m     data \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfilename)\n\u001b[1;32m    124\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfind_max_index\u001b[39m(megacells):\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:84\u001b[0m, in \u001b[0;36mAutotuner.benchmark\u001b[0;34m(self, simulator, force)\u001b[0m\n\u001b[1;32m     81\u001b[0m             benchmark_data[k] \u001b[38;5;241m=\u001b[39m v\n\u001b[1;32m     83\u001b[0m \u001b[38;5;66;03m# Run benchmark\u001b[39;00m\n\u001b[0;32m---> 84\u001b[0m benchmark_data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_megacells\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mAutotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbenchmark_single_simulator\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43marguments\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_widths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_heights\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     85\u001b[0m benchmark_data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_block_widths\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblock_widths\n\u001b[1;32m     86\u001b[0m benchmark_data[key \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_block_heights\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mblock_heights\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:162\u001b[0m, in \u001b[0;36mAutotuner.benchmark_single_simulator\u001b[0;34m(simulator, arguments, block_widths, block_heights)\u001b[0m\n\u001b[1;32m    160\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m i, block_width \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(block_widths):\n\u001b[1;32m    161\u001b[0m             sim_arguments\u001b[38;5;241m.\u001b[39mupdate({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mblock_width\u001b[39m\u001b[38;5;124m'\u001b[39m: block_width})\n\u001b[0;32m--> 162\u001b[0m             megacells[j, i] \u001b[38;5;241m=\u001b[39m \u001b[43mAutotuner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_benchmark\u001b[49m\u001b[43m(\u001b[49m\u001b[43msimulator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msim_arguments\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    165\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCompleted \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m in \u001b[39m\u001b[38;5;132;01m%f\u001b[39;00m\u001b[38;5;124m seconds\u001b[39m\u001b[38;5;124m\"\u001b[39m, simulator\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, t\u001b[38;5;241m.\u001b[39msecs)\n\u001b[1;32m    167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m megacells\n", | ||||
|       "File \u001b[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:200\u001b[0m, in \u001b[0;36mAutotuner.run_benchmark\u001b[0;34m(simulator, arguments, timesteps, warmup_timesteps)\u001b[0m\n\u001b[1;32m    197\u001b[0m end\u001b[38;5;241m.\u001b[39mrecord(sim\u001b[38;5;241m.\u001b[39mstream)\n\u001b[1;32m    199\u001b[0m \u001b[38;5;66;03m#Synchronize end event\u001b[39;00m\n\u001b[0;32m--> 200\u001b[0m \u001b[43mend\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msynchronize\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[38;5;66;03m#Compute megacells\u001b[39;00m\n\u001b[1;32m    203\u001b[0m gpu_elapsed \u001b[38;5;241m=\u001b[39m end\u001b[38;5;241m.\u001b[39mtime_since(start)\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m1.0e-3\u001b[39m\n", | ||||
|       "\u001b[0;31mKeyboardInterrupt\u001b[0m: " | ||||
|       "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", | ||||
|       "\u001B[0;31mKeyboardInterrupt\u001B[0m                         Traceback (most recent call last)", | ||||
|       "Cell \u001B[0;32mIn[10], line 5\u001B[0m\n\u001B[1;32m      2\u001B[0m importlib\u001B[38;5;241m.\u001B[39mreload(KP07)\n\u001B[1;32m      4\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Common\u001B[38;5;241m.\u001B[39mTimer(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mconstruct\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m t:\n\u001B[0;32m----> 5\u001B[0m     sim \u001B[38;5;241m=\u001B[39m \u001B[43mKP07\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mKP07\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43marguments\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m      7\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Common\u001B[38;5;241m.\u001B[39mTimer(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mstep\u001B[39m\u001B[38;5;124m\"\u001B[39m) \u001B[38;5;28;01mas\u001B[39;00m t:\n\u001B[1;32m      8\u001B[0m     t \u001B[38;5;241m=\u001B[39m sim\u001B[38;5;241m.\u001B[39msimulate(t_end)\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/KP07.py:70\u001B[0m, in \u001B[0;36mKP07.__init__\u001B[0;34m(self, context, h0, hu0, hv0, nx, ny, dx, dy, g, theta, cfl_scale, order, boundary_conditions, block_width, block_height, dt, compile_opts)\u001B[0m\n\u001B[1;32m     53\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m     54\u001B[0m \u001B[38;5;124;03mInitialization routine\u001B[39;00m\n\u001B[1;32m     55\u001B[0m \u001B[38;5;124;03m\u001B[39;00m\n\u001B[0;32m   (...)\u001B[0m\n\u001B[1;32m     66\u001B[0m \u001B[38;5;124;03m    compile_opts: Pass a list of nvcc compiler options\u001B[39;00m\n\u001B[1;32m     67\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[1;32m     69\u001B[0m \u001B[38;5;66;03m# Call super constructor\u001B[39;00m\n\u001B[0;32m---> 70\u001B[0m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;21;43m__init__\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mcontext\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\n\u001B[1;32m     71\u001B[0m \u001B[43m    \u001B[49m\u001B[43mnx\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mny\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\n\u001B[1;32m     72\u001B[0m \u001B[43m    \u001B[49m\u001B[43mdx\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mdy\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\n\u001B[1;32m     73\u001B[0m \u001B[43m    \u001B[49m\u001B[43mboundary_conditions\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m     74\u001B[0m \u001B[43m    \u001B[49m\u001B[43mcfl_scale\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m     75\u001B[0m \u001B[43m    \u001B[49m\u001B[43morder\u001B[49m\u001B[43m,\u001B[49m\n\u001B[1;32m     76\u001B[0m \u001B[43m    \u001B[49m\u001B[43mblock_width\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mblock_height\u001B[49m\u001B[43m)\u001B[49m;\n\u001B[1;32m     77\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mg \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mfloat32(g)             \n\u001B[1;32m     78\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mtheta \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mfloat32(theta) \n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Simulator.py:146\u001B[0m, in \u001B[0;36mBaseSimulator.__init__\u001B[0;34m(self, context, nx, ny, dx, dy, boundary_conditions, cfl_scale, num_substeps, block_width, block_height)\u001B[0m\n\u001B[1;32m    144\u001B[0m \u001B[38;5;66;03m#Handle autotuning block size\u001B[39;00m\n\u001B[1;32m    145\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mcontext\u001B[38;5;241m.\u001B[39mautotuner:\n\u001B[0;32m--> 146\u001B[0m     peak_configuration \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mcontext\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[38;5;18;43m__class__\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[1;32m    147\u001B[0m     block_width \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mint\u001B[39m(peak_configuration[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m])\n\u001B[1;32m    148\u001B[0m     block_height \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mint\u001B[39m(peak_configuration[\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m])\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:121\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m    119\u001B[0m     logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCould not get autotuned peak performance for \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m: benchmarking\u001B[39m\u001B[38;5;124m\"\u001B[39m, key)\n\u001B[1;32m    120\u001B[0m     data\u001B[38;5;241m.\u001B[39mclose()\n\u001B[0;32m--> 121\u001B[0m     \u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbenchmark\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    122\u001B[0m     data \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mload(\u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mfilename)\n\u001B[1;32m    124\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:84\u001B[0m, in \u001B[0;36mAutotuner.benchmark\u001B[0;34m(self, simulator, force)\u001B[0m\n\u001B[1;32m     81\u001B[0m             benchmark_data[k] \u001B[38;5;241m=\u001B[39m v\n\u001B[1;32m     83\u001B[0m \u001B[38;5;66;03m# Run benchmark\u001B[39;00m\n\u001B[0;32m---> 84\u001B[0m benchmark_data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_megacells\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[43mAutotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mbenchmark_single_simulator\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43marguments\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mblock_widths\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mblock_heights\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m     85\u001B[0m benchmark_data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mblock_widths\n\u001B[1;32m     86\u001B[0m benchmark_data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m\"\u001B[39m] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mblock_heights\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:162\u001B[0m, in \u001B[0;36mAutotuner.benchmark_single_simulator\u001B[0;34m(simulator, arguments, block_widths, block_heights)\u001B[0m\n\u001B[1;32m    160\u001B[0m         \u001B[38;5;28;01mfor\u001B[39;00m i, block_width \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(block_widths):\n\u001B[1;32m    161\u001B[0m             sim_arguments\u001B[38;5;241m.\u001B[39mupdate({\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m: block_width})\n\u001B[0;32m--> 162\u001B[0m             megacells[j, i] \u001B[38;5;241m=\u001B[39m \u001B[43mAutotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mrun_benchmark\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43msim_arguments\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    165\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mCompleted \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m in \u001B[39m\u001B[38;5;132;01m%f\u001B[39;00m\u001B[38;5;124m seconds\u001B[39m\u001B[38;5;124m\"\u001B[39m, simulator\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, t\u001B[38;5;241m.\u001B[39msecs)\n\u001B[1;32m    167\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m megacells\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:200\u001B[0m, in \u001B[0;36mAutotuner.run_benchmark\u001B[0;34m(simulator, arguments, timesteps, warmup_timesteps)\u001B[0m\n\u001B[1;32m    197\u001B[0m end\u001B[38;5;241m.\u001B[39mrecord(sim\u001B[38;5;241m.\u001B[39mstream)\n\u001B[1;32m    199\u001B[0m \u001B[38;5;66;03m#Synchronize end event\u001B[39;00m\n\u001B[0;32m--> 200\u001B[0m \u001B[43mend\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43msynchronize\u001B[49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    202\u001B[0m \u001B[38;5;66;03m#Compute megacells\u001B[39;00m\n\u001B[1;32m    203\u001B[0m gpu_elapsed \u001B[38;5;241m=\u001B[39m end\u001B[38;5;241m.\u001B[39mtime_since(start)\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m1.0e-3\u001B[39m\n", | ||||
|       "\u001B[0;31mKeyboardInterrupt\u001B[0m: " | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|  | ||||
| @ -34,8 +34,9 @@ from mpi4py import MPI | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| # Simulator engine etc | ||||
| from GPUSimulators import MPISimulator, Common | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| from GPUSimulators import MPISimulator | ||||
| from GPUSimulators.common import common | ||||
| from GPUSimulators.gpu import cuda_context | ||||
| from GPUSimulators import EE2D_KP07_dimsplit | ||||
| from GPUSimulators.helpers import InitialConditions as IC | ||||
| 
 | ||||
| @ -147,7 +148,7 @@ def genSim(grid, **kwargs): | ||||
|     return sim | ||||
| 
 | ||||
| 
 | ||||
| outfile, sim_runner_profiling_data, sim_profiling_data = Common.runSimulation( | ||||
| outfile, sim_runner_profiling_data, sim_profiling_data = Common.run_simulation( | ||||
|     genSim, arguments, outfile, save_times, save_var_names, dt) | ||||
| 
 | ||||
| if(args.profile): | ||||
| @ -183,8 +184,8 @@ if(args.profile and MPI.COMM_WORLD.rank == 0): | ||||
|     profiling_data["slurm_job_id"] = job_id | ||||
|     profiling_data["n_cuda_devices"] = str(num_cuda_devices) | ||||
|     profiling_data["n_processes"] = str(MPI.COMM_WORLD.size) | ||||
|     profiling_data["git_hash"] = Common.getGitHash() | ||||
|     profiling_data["git_status"] = Common.getGitStatus() | ||||
|     profiling_data["git_hash"] = Common.get_git_hash() | ||||
|     profiling_data["git_status"] = Common.get_git_status() | ||||
| 
 | ||||
|     with open(profiling_file, "w") as write_file: | ||||
|         json.dump(profiling_data, write_file) | ||||
|  | ||||
| @ -25,7 +25,8 @@ import gc | ||||
| import logging | ||||
| 
 | ||||
| #Simulator engine etc | ||||
| from GPUSimulators import SHMEMSimulatorGroup, Common | ||||
| from GPUSimulators import SHMEMSimulatorGroup | ||||
| from GPUSimulators.common import common | ||||
| from GPUSimulators import EE2D_KP07_dimsplit | ||||
| from GPUSimulators.helpers import InitialConditions as IC | ||||
| 
 | ||||
| @ -99,7 +100,7 @@ def genSim(sims, grid, **kwargs): | ||||
|     sim = SHMEMSimulatorGroup.SHMEMSimulatorGroup(sims, grid) | ||||
|     return sim | ||||
| 
 | ||||
| outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names) | ||||
| outfile = Common.run_simulation(genSim, arguments, outfile, save_times, save_var_names) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -28,8 +28,8 @@ import logging | ||||
| import pycuda.driver as cuda | ||||
| 
 | ||||
| # Simulator engine etc | ||||
| from GPUSimulators import Common | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| from GPUSimulators.common import common | ||||
| from GPUSimulators.gpu import cuda_context | ||||
| from GPUSimulators import EE2D_KP07_dimsplit | ||||
| from GPUSimulators.helpers import InitialConditions as IC | ||||
| 
 | ||||
| @ -104,7 +104,7 @@ def genSim(**kwargs): | ||||
|     return local_sim | ||||
| 
 | ||||
| 
 | ||||
| outfile = Common.runSimulation( | ||||
| outfile = Common.run_simulation( | ||||
|     genSim, arguments, outfile, save_times, save_var_names) | ||||
| 
 | ||||
| #### | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg