mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:17:41 +01:00 
			
		
		
		
	refactor(autotuner): move models to a separate package
This commit is contained in:
		
							parent
							
								
									ae9d2b1595
								
							
						
					
					
						commit
						3b424d1f5a
					
				
							
								
								
									
										309
									
								
								Autotuning.ipynb
									
									
									
									
									
								
							
							
						
						
									
										309
									
								
								Autotuning.ipynb
									
									
									
									
									
								
							| @ -45,23 +45,22 @@ | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 5, | ||||
|    "metadata": {}, | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "execution_count": null, | ||||
|    "source": [ | ||||
|     "from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner\n", | ||||
|     "from GPUSimulators import Autotuner\n", | ||||
|     "from GPUSimulators.model import Force, HLL, HLL2, KP07, LxF, WAF, KP07Dimsplit\n", | ||||
|     "from GPUSimulators.common import Timer" | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 6, | ||||
|    "metadata": {}, | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "source": [ | ||||
|     "%setup_logging --out autotuning.log --name=autotuning" | ||||
|    ] | ||||
|    "execution_count": null, | ||||
|    "source": "%setup_logging --out autotuning.log --name=autotuning" | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
| @ -111,28 +110,12 @@ | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": 9, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "ename": "ValueError", | ||||
|      "evalue": "All-NaN slice encountered", | ||||
|      "output_type": "error", | ||||
|      "traceback": [ | ||||
|       "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", | ||||
|       "\u001B[0;31mValueError\u001B[0m                                Traceback (most recent call last)", | ||||
|       "Cell \u001B[0;32mIn[9], line 2\u001B[0m\n\u001B[1;32m      1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [autotuner\u001B[38;5;241m.\u001B[39mget_peak_performance(simulator) \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m      3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m      4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n", | ||||
|       "Cell \u001B[0;32mIn[9], line 2\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m      1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m      3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m      4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m    130\u001B[0m block_widths \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m    131\u001B[0m block_heights \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[0;32m--> 132\u001B[0m j, i \u001B[38;5;241m=\u001B[39m \u001B[43mfind_max_index\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    134\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key] \u001B[38;5;241m=\u001B[39m { \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_widths[i],\n\u001B[1;32m    135\u001B[0m                          \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_heights[j],\n\u001B[1;32m    136\u001B[0m                          \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m\"\u001B[39m: megacells[j, i] }\n\u001B[1;32m    137\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mReturning \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m as peak performance parameters\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key])\n", | ||||
|       "File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001B[0;34m(megacells)\u001B[0m\n\u001B[1;32m    125\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n\u001B[0;32m--> 126\u001B[0m     max_index \u001B[38;5;241m=\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnanargmax\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m    127\u001B[0m     \u001B[38;5;28;01mreturn\u001B[39;00m np\u001B[38;5;241m.\u001B[39munravel_index(max_index, megacells\u001B[38;5;241m.\u001B[39mshape)\n", | ||||
|       "File \u001B[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001B[0m, in \u001B[0;36mnanargmax\u001B[0;34m(a, axis, out, keepdims)\u001B[0m\n\u001B[1;32m    611\u001B[0m     mask \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mall(mask, axis\u001B[38;5;241m=\u001B[39maxis)\n\u001B[1;32m    612\u001B[0m     \u001B[38;5;28;01mif\u001B[39;00m np\u001B[38;5;241m.\u001B[39many(mask):\n\u001B[0;32m--> 613\u001B[0m         \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll-NaN slice encountered\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m    614\u001B[0m res \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39margmax(a, axis\u001B[38;5;241m=\u001B[39maxis, out\u001B[38;5;241m=\u001B[39mout, keepdims\u001B[38;5;241m=\u001B[39mkeepdims)\n\u001B[1;32m    615\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m res\n", | ||||
|       "\u001B[0;31mValueError\u001B[0m: All-NaN slice encountered" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "execution_count": null, | ||||
|    "source": [ | ||||
|     "simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n", | ||||
|     "simulators = [LxF, Force, HLL, HLL2, KP07, KP07Dimsplit, WAF]\n", | ||||
|     "peak_performance = [autotuner.get_peak_performance(simulator) for simulator in simulators]\n", | ||||
|     "megacells = [performance['megacells'] for performance in peak_performance]\n", | ||||
|     "xlabels = [f\"{simulators[i].__name__}\\n[{performance['block_width']}x{performance['block_height']}]\" for i, performance\n", | ||||
| @ -146,28 +129,11 @@ | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Registering my_context in user workspace\n", | ||||
|       "PyCUDA version 2017.1.1\n", | ||||
|       "CUDA version (9, 1, 0)\n", | ||||
|       "Driver version 9010\n", | ||||
|       "Using 'GeForce 840M' GPU\n", | ||||
|       "Created context handle <694827722560>\n", | ||||
|       "Using CUDA cache dir c:\\Users\\anbro\\Documents\\projects\\ShallowWaterGPU\\GPUSimulators\\cuda_cache\n", | ||||
|       "Autotuning enabled. It may take several minutes to run the code the first time: have patience\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "%cuda_context_handler my_context" | ||||
|    ] | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "execution_count": null, | ||||
|    "source": "%cuda_context_handler my_context" | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
| @ -258,239 +224,14 @@ | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "metadata": {}, | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "execution_count": null, | ||||
|    "metadata": { | ||||
|     "scrolled": false | ||||
|    }, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "LxF\n", | ||||
|       "[63x63] => 107.3 (0.000185)\n", | ||||
|       "[127x127] => 165.6 (0.000487)\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "C:\\Users\\anbro\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in sqrt\n" | ||||
|      ] | ||||
|     }, | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "[191x191] => 183.4 (0.000995)\n", | ||||
|       "[255x255] => 180.0 (0.001806)\n", | ||||
|       "[319x319] => 185.8 (0.002738)\n", | ||||
|       "[383x383] => 187.3 (0.003915)\n", | ||||
|       "[447x447] => 189.7 (0.005266)\n", | ||||
|       "[511x511] => 191.8 (0.006806)\n", | ||||
|       "[639x639] => 193.6 (0.010548)\n", | ||||
|       "[767x767] => 193.7 (0.015182)\n", | ||||
|       "[895x895] => 195.6 (0.020481)\n", | ||||
|       "[1023x1023] => 195.0 (0.026839)\n", | ||||
|       "[1151x1151] => 195.8 (0.033822)\n", | ||||
|       "[1279x1279] => 196.1 (0.041711)\n", | ||||
|       "[1407x1407] => 196.2 (0.050439)\n", | ||||
|       "[1535x1535] => 196.4 (0.059986)\n", | ||||
|       "[1663x1663] => 196.6 (0.070330)\n", | ||||
|       "[1791x1791] => 196.7 (0.081546)\n", | ||||
|       "[1919x1919] => 196.9 (0.093511)\n", | ||||
|       "[2047x2047] => 202.9 (0.103257)\n", | ||||
|       "[2303x2303] => 210.7 (0.125838)\n", | ||||
|       "[2559x2559] => 208.0 (0.157417)\n", | ||||
|       "[2815x2815] => 211.6 (0.187229)\n", | ||||
|       "[3071x3071] => 208.7 (0.225954)\n", | ||||
|       "[3327x3327] => 214.2 (0.258395)\n", | ||||
|       "[3583x3583] => 214.2 (0.299629)\n", | ||||
|       "[3839x3839] => 214.2 (0.343982)\n", | ||||
|       "[4095x4095] => 214.9 (0.390088)\n", | ||||
|       "FORCE\n", | ||||
|       "[63x63] => 94.3 (0.000210)\n", | ||||
|       "[127x127] => 136.5 (0.000591)\n", | ||||
|       "[191x191] => 147.0 (0.001241)\n", | ||||
|       "[255x255] => 148.5 (0.002189)\n", | ||||
|       "[319x319] => 151.6 (0.003357)\n", | ||||
|       "[383x383] => 153.0 (0.004793)\n", | ||||
|       "[447x447] => 153.9 (0.006494)\n", | ||||
|       "[511x511] => 155.0 (0.008421)\n", | ||||
|       "[639x639] => 156.4 (0.013056)\n", | ||||
|       "[767x767] => 156.5 (0.018790)\n", | ||||
|       "[895x895] => 157.0 (0.025514)\n", | ||||
|       "[1023x1023] => 143.6 (0.036450)\n", | ||||
|       "[1151x1151] => 143.6 (0.046115)\n", | ||||
|       "[1279x1279] => 143.8 (0.056865)\n", | ||||
|       "[1407x1407] => 143.9 (0.068797)\n", | ||||
|       "[1535x1535] => 144.0 (0.081832)\n", | ||||
|       "[1663x1663] => 144.0 (0.096007)\n", | ||||
|       "[1791x1791] => 144.0 (0.111343)\n", | ||||
|       "[1919x1919] => 144.2 (0.127712)\n", | ||||
|       "[2047x2047] => 151.7 (0.138153)\n", | ||||
|       "[2303x2303] => 147.3 (0.180021)\n", | ||||
|       "[2559x2559] => 154.3 (0.212248)\n", | ||||
|       "[2815x2815] => 158.3 (0.250279)\n", | ||||
|       "[3071x3071] => 156.9 (0.300547)\n", | ||||
|       "[3327x3327] => 158.4 (0.349353)\n", | ||||
|       "[3583x3583] => 158.4 (0.405175)\n", | ||||
|       "[3839x3839] => 158.4 (0.465201)\n", | ||||
|       "[4095x4095] => 158.4 (0.529337)\n", | ||||
|       "HLL\n", | ||||
|       "[63x63] => 65.7 (0.000302)\n", | ||||
|       "[127x127] => 98.6 (0.000818)\n", | ||||
|       "[191x191] => 108.1 (0.001688)\n", | ||||
|       "[255x255] => 109.2 (0.002977)\n", | ||||
|       "[319x319] => 111.9 (0.004546)\n", | ||||
|       "[383x383] => 113.2 (0.006482)\n", | ||||
|       "[447x447] => 113.7 (0.008785)\n", | ||||
|       "[511x511] => 114.4 (0.011411)\n", | ||||
|       "[639x639] => 115.3 (0.017713)\n", | ||||
|       "[767x767] => 115.6 (0.025454)\n", | ||||
|       "[895x895] => 105.7 (0.037888)\n", | ||||
|       "[1023x1023] => 105.8 (0.049473)\n", | ||||
|       "[1151x1151] => 105.9 (0.062558)\n", | ||||
|       "[1279x1279] => 106.0 (0.077148)\n", | ||||
|       "[1407x1407] => 106.1 (0.093290)\n", | ||||
|       "[1535x1535] => 109.8 (0.107271)\n", | ||||
|       "[1663x1663] => 106.2 (0.130195)\n", | ||||
|       "[1791x1791] => 107.7 (0.148973)\n", | ||||
|       "[1919x1919] => 115.0 (0.160104)\n", | ||||
|       "[2047x2047] => 113.3 (0.184913)\n", | ||||
|       "[2303x2303] => 111.9 (0.236908)\n", | ||||
|       "[2559x2559] => 116.6 (0.280840)\n", | ||||
|       "[2815x2815] => 116.6 (0.339777)\n", | ||||
|       "[3071x3071] => 116.6 (0.404268)\n", | ||||
|       "[3327x3327] => 116.6 (0.474572)\n", | ||||
|       "[3583x3583] => 116.7 (0.550240)\n", | ||||
|       "[3839x3839] => 116.7 (0.631563)\n", | ||||
|       "[4095x4095] => 116.7 (0.718161)\n", | ||||
|       "HLL2\n", | ||||
|       "[63x63] => 44.2 (0.000449)\n", | ||||
|       "[127x127] => 63.0 (0.001280)\n", | ||||
|       "[191x191] => 68.4 (0.002666)\n", | ||||
|       "[255x255] => 69.2 (0.004698)\n", | ||||
|       "[319x319] => 70.6 (0.007204)\n", | ||||
|       "[383x383] => 71.1 (0.010314)\n", | ||||
|       "[447x447] => 71.6 (0.013956)\n", | ||||
|       "[511x511] => 72.0 (0.018146)\n", | ||||
|       "[639x639] => 72.4 (0.028204)\n", | ||||
|       "[767x767] => 72.5 (0.040545)\n", | ||||
|       "[895x895] => 72.8 (0.055047)\n", | ||||
|       "[1023x1023] => 72.8 (0.071828)\n", | ||||
|       "[1151x1151] => 66.5 (0.099652)\n", | ||||
|       "[1279x1279] => 69.8 (0.117195)\n", | ||||
|       "[1407x1407] => 67.0 (0.147833)\n", | ||||
|       "[1535x1535] => 71.3 (0.165185)\n", | ||||
|       "[1663x1663] => 71.2 (0.194123)\n", | ||||
|       "[1791x1791] => 72.1 (0.222351)\n", | ||||
|       "[1919x1919] => 70.3 (0.261847)\n", | ||||
|       "[2047x2047] => 73.2 (0.286228)\n", | ||||
|       "[2303x2303] => 72.0 (0.368479)\n", | ||||
|       "[2559x2559] => 73.2 (0.447096)\n", | ||||
|       "[2815x2815] => 73.2 (0.541084)\n", | ||||
|       "[3071x3071] => 73.2 (0.643925)\n", | ||||
|       "[3327x3327] => 73.2 (0.755588)\n", | ||||
|       "[3583x3583] => 73.3 (0.876222)\n", | ||||
|       "[3839x3839] => 73.3 (1.005958)\n", | ||||
|       "[4095x4095] => 73.3 (1.144158)\n", | ||||
|       "KP07\n", | ||||
|       "[63x63] => 69.9 (0.000284)\n", | ||||
|       "[127x127] => 95.0 (0.000849)\n", | ||||
|       "[191x191] => 101.7 (0.001794)\n", | ||||
|       "[255x255] => 101.3 (0.003209)\n", | ||||
|       "[319x319] => 106.9 (0.004760)\n", | ||||
|       "[383x383] => 107.1 (0.006850)\n", | ||||
|       "[447x447] => 109.2 (0.009150)\n", | ||||
|       "[511x511] => 108.0 (0.012088)\n", | ||||
|       "[639x639] => 111.6 (0.018295)\n", | ||||
|       "[767x767] => 111.6 (0.026361)\n", | ||||
|       "[895x895] => 102.4 (0.039123)\n", | ||||
|       "[1023x1023] => 102.2 (0.051186)\n", | ||||
|       "[1151x1151] => 102.3 (0.064764)\n", | ||||
|       "[1279x1279] => 103.4 (0.079074)\n", | ||||
|       "[1407x1407] => 103.2 (0.095876)\n", | ||||
|       "[1535x1535] => 106.3 (0.110860)\n", | ||||
|       "[1663x1663] => 103.1 (0.134182)\n", | ||||
|       "[1791x1791] => 107.7 (0.148853)\n", | ||||
|       "[1919x1919] => 105.5 (0.174575)\n", | ||||
|       "[2047x2047] => 111.4 (0.188084)\n", | ||||
|       "[2303x2303] => 113.5 (0.233650)\n", | ||||
|       "[2559x2559] => 114.0 (0.287327)\n", | ||||
|       "[2815x2815] => 113.7 (0.348536)\n", | ||||
|       "[3071x3071] => 113.2 (0.416533)\n", | ||||
|       "[3327x3327] => 113.7 (0.486893)\n", | ||||
|       "[3583x3583] => 113.5 (0.565573)\n", | ||||
|       "[3839x3839] => 113.5 (0.649058)\n", | ||||
|       "[4095x4095] => 113.6 (0.738275)\n", | ||||
|       "KP07_dimsplit\n", | ||||
|       "[63x63] => 49.9 (0.000397)\n", | ||||
|       "[127x127] => 71.7 (0.001125)\n", | ||||
|       "[191x191] => 76.8 (0.002374)\n", | ||||
|       "[255x255] => 77.5 (0.004197)\n", | ||||
|       "[319x319] => 79.0 (0.006437)\n", | ||||
|       "[383x383] => 79.8 (0.009189)\n", | ||||
|       "[447x447] => 80.3 (0.012449)\n", | ||||
|       "[511x511] => 80.6 (0.016191)\n", | ||||
|       "[639x639] => 81.1 (0.025171)\n", | ||||
|       "[767x767] => 81.3 (0.036181)\n", | ||||
|       "[895x895] => 74.3 (0.053902)\n", | ||||
|       "[1023x1023] => 74.4 (0.070335)\n", | ||||
|       "[1151x1151] => 76.2 (0.086896)\n", | ||||
|       "[1279x1279] => 74.5 (0.109725)\n", | ||||
|       "[1407x1407] => 74.6 (0.132712)\n", | ||||
|       "[1535x1535] => 79.4 (0.148342)\n", | ||||
|       "[1663x1663] => 78.3 (0.176547)\n", | ||||
|       "[1791x1791] => 81.3 (0.197279)\n", | ||||
|       "[1919x1919] => 78.5 (0.234550)\n", | ||||
|       "[2047x2047] => 82.0 (0.255396)\n", | ||||
|       "[2303x2303] => 81.0 (0.327297)\n", | ||||
|       "[2559x2559] => 82.0 (0.399197)\n", | ||||
|       "[2815x2815] => 82.0 (0.483034)\n", | ||||
|       "[3071x3071] => 82.0 (0.574737)\n", | ||||
|       "[3327x3327] => 82.1 (0.674395)\n", | ||||
|       "[3583x3583] => 82.1 (0.782180)\n", | ||||
|       "[3839x3839] => 82.1 (0.897551)\n", | ||||
|       "[4095x4095] => 82.1 (1.020911)\n", | ||||
|       "WAF\n", | ||||
|       "[63x63] => 32.8 (0.000605)\n", | ||||
|       "[127x127] => 45.6 (0.001768)\n", | ||||
|       "[191x191] => 53.9 (0.003381)\n", | ||||
|       "[255x255] => 54.3 (0.005985)\n", | ||||
|       "[319x319] => 57.7 (0.008821)\n", | ||||
|       "[383x383] => 56.9 (0.012893)\n", | ||||
|       "[447x447] => 59.3 (0.016840)\n", | ||||
|       "[511x511] => 58.8 (0.022214)\n", | ||||
|       "[639x639] => 59.6 (0.034278)\n", | ||||
|       "[767x767] => 60.1 (0.048942)\n", | ||||
|       "[895x895] => 55.3 (0.072483)\n", | ||||
|       "[1023x1023] => 55.4 (0.094402)\n", | ||||
|       "[1151x1151] => 55.7 (0.119006)\n", | ||||
|       "[1279x1279] => 55.0 (0.148746)\n", | ||||
|       "[1407x1407] => 55.8 (0.177399)\n", | ||||
|       "[1535x1535] => 58.7 (0.200663)\n", | ||||
|       "[1663x1663] => 57.8 (0.239299)\n", | ||||
|       "[1791x1791] => 59.6 (0.269144)\n", | ||||
|       "[1919x1919] => 61.1 (0.301218)\n", | ||||
|       "[2047x2047] => 61.2 (0.342070)\n", | ||||
|       "[2303x2303] => 61.3 (0.432280)\n", | ||||
|       "[2559x2559] => 61.0 (0.537125)\n", | ||||
|       "[2815x2815] => 61.1 (0.648336)\n", | ||||
|       "[3071x3071] => 61.3 (0.769734)\n", | ||||
|       "[3327x3327] => 61.4 (0.901199)\n", | ||||
|       "[3583x3583] => 61.1 (1.049726)\n", | ||||
|       "[3839x3839] => 61.3 (1.202961)\n", | ||||
|       "[4095x4095] => 61.4 (1.366446)\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "source": [ | ||||
|     "run_simulation = True\n", | ||||
|     "sizes = list(range(64, 512, 64)) + list(range(512, 2048, 128)) + list(range(2048, 4096, 256)) + [4096]\n", | ||||
|     "simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n", | ||||
|     "simulators = [LxF, Force, HLL, HLL2, KP07, KP07Dimsplit, WAF]\n", | ||||
|     "if run_simulation:\n", | ||||
|     "    megacells = {}\n", | ||||
|     "    for simulator in simulators:\n", | ||||
| @ -542,18 +283,10 @@ | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "cell_type": "code", | ||||
|    "execution_count": null, | ||||
|    "metadata": {}, | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stdout", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "Loading from file\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "execution_count": null, | ||||
|    "source": [ | ||||
|     "datafilename = \"megacells.npz\"\n", | ||||
|     "if not os.path.isfile(datafilename) and \"megacells\" in globals():\n", | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -31,7 +31,7 @@ | ||||
|     "    from io import StringIO\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators.common import Timer, DataDumper, ProgressPrinter\n", | ||||
|     "from GPUSimulators.EE2D_KP07_dimsplit import EE2D_KP07_dimsplit\n", | ||||
|     "from GPUSimulators.model import EE2DKP07Dimsplit\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions, Visualization" | ||||
|    ] | ||||
|   }, | ||||
| @ -96,7 +96,7 @@ | ||||
|    "source": [ | ||||
|     "def run_simulation(outfile, t_end, sim_args):\n", | ||||
|     "    with Timer(\"construct\") as t:\n", | ||||
|     "        sim = EE2D_KP07_dimsplit(**sim_args)\n", | ||||
|     "        sim = EE2DKP07Dimsplit(**sim_args)\n", | ||||
|     "    print(\"Constructed in \" + str(t.secs) + \" seconds\")\n", | ||||
|     "\n", | ||||
|     "    #Create a netcdf file and simulate\n", | ||||
|  | ||||
							
								
								
									
										8
									
								
								GPUSimulators/model/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								GPUSimulators/model/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| from ee2d_kp07_dimsplit import EE2DKP07Dimsplit | ||||
| from force import Force | ||||
| from hll import HLL | ||||
| from hll2 import HLL2 | ||||
| from kp07 import KP07 | ||||
| from kp07_dimsplit import KP07Dimsplit | ||||
| from lxf import LxF | ||||
| from waf import WAF | ||||
| @ -27,7 +27,7 @@ from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition | ||||
| 
 | ||||
| 
 | ||||
| class EE2D_KP07_dimsplit(BaseSimulator): | ||||
| class EE2DKP07Dimsplit(BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the Forward-Backward linear scheme | ||||
|     """ | ||||
| @ -29,7 +29,7 @@ from GPUSimulators import Simulator | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| 
 | ||||
| class FORCE(Simulator.BaseSimulator): | ||||
| class Force(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations  | ||||
|     """ | ||||
| @ -33,7 +33,7 @@ from GPUSimulators.common import ArakawaA2D | ||||
| from GPUSimulators.Simulator import BoundaryCondition | ||||
| 
 | ||||
| 
 | ||||
| class KP07_dimsplit(Simulator.BaseSimulator): | ||||
| class KP07Dimsplit(Simulator.BaseSimulator): | ||||
|     """ | ||||
|     Class that solves the SW equations using the dimentionally split KP07 scheme | ||||
|     """ | ||||
| @ -169,16 +169,14 @@ | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "metadata": { | ||||
|     "ExecuteTime": { | ||||
|      "end_time": "2025-06-24T16:05:59.437729Z", | ||||
|      "start_time": "2025-06-24T16:05:59.432669Z" | ||||
|     } | ||||
|    }, | ||||
|    "metadata": {}, | ||||
|    "cell_type": "code", | ||||
|    "outputs": [], | ||||
|    "execution_count": null, | ||||
|    "source": [ | ||||
|     "%%px\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators.model import EE2DKP07Dimsplit\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions\n", | ||||
|     "\n", | ||||
|     "my_context.autotuner = None\n", | ||||
| @ -201,27 +199,17 @@ | ||||
|     "arguments['theta'] = 1.2\n", | ||||
|     "arguments['grid'] = grid\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators import EE2D_KP07_dimsplit\n", | ||||
|     "from GPUSimulators.model import ee2d_kp07_dimsplit, hll2\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "def gen_sim(grid, **kwargs):\n", | ||||
|     "    local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)\n", | ||||
|     "    local_sim = EE2DKP07Dimsplit(**kwargs)\n", | ||||
|     "    sim = MPISimulator.MPISimulator(local_sim, grid)\n", | ||||
|     "    return sim\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "outfile = run_simulation(gen_sim, arguments, outfile, save_times, save_var_names)" | ||||
|    ], | ||||
|    "outputs": [ | ||||
|     { | ||||
|      "name": "stderr", | ||||
|      "output_type": "stream", | ||||
|      "text": [ | ||||
|       "UsageError: Cell magic `%%px` not found.\n" | ||||
|      ] | ||||
|     } | ||||
|    ], | ||||
|    "execution_count": 1 | ||||
|    ] | ||||
|   }, | ||||
|   { | ||||
|    "metadata": {}, | ||||
| @ -271,6 +259,7 @@ | ||||
|    "source": [ | ||||
|     "%%px\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators.model import HLL2\n", | ||||
|     "from GPUSimulators.helpers import InitialConditions\n", | ||||
|     "from GPUSimulators.Simulator import BoundaryCondition\n", | ||||
|     "\n", | ||||
| @ -307,11 +296,9 @@ | ||||
|     "    'grid': grid\n", | ||||
|     "}\n", | ||||
|     "\n", | ||||
|     "from GPUSimulators import HLL2\n", | ||||
|     "\n", | ||||
|     "\n", | ||||
|     "def gen_sim(grid, **kwargs):\n", | ||||
|     "    local_sim = HLL2.HLL2(**kwargs)\n", | ||||
|     "    local_sim = HLL2(**kwargs)\n", | ||||
|     "    sim = MPISimulator.MPISimulator(local_sim, grid)\n", | ||||
|     "    return sim\n", | ||||
|     "\n", | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -36,7 +36,7 @@ import pycuda.driver as cuda | ||||
| from GPUSimulators import MPISimulator | ||||
| from GPUSimulators.common import run_simulation, get_git_hash, get_git_status | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| from GPUSimulators import EE2D_KP07_dimsplit | ||||
| from GPUSimulators.model import EE2DKP07Dimsplit | ||||
| from GPUSimulators.helpers import InitialConditions as IC | ||||
| 
 | ||||
| import argparse | ||||
| @ -137,7 +137,7 @@ logger.info("Running simulation") | ||||
| 
 | ||||
| 
 | ||||
| def genSim(grid, **kwargs): | ||||
|     local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs) | ||||
|     local_sim = EE2DKP07Dimsplit(**kwargs) | ||||
|     sim = MPISimulator.MPISimulator(local_sim, grid) | ||||
|     return sim | ||||
| 
 | ||||
|  | ||||
| @ -25,8 +25,8 @@ import logging | ||||
| 
 | ||||
| # Simulator engine etc | ||||
| from GPUSimulators import SHMEMSimulatorGroup | ||||
| from GPUSimulators.common import common, run_simulation | ||||
| from GPUSimulators import EE2D_KP07_dimsplit | ||||
| from GPUSimulators.common import run_simulation | ||||
| from GPUSimulators.model import EE2DKP07Dimsplit | ||||
| from GPUSimulators.helpers import InitialConditions as IC | ||||
| 
 | ||||
| #### | ||||
| @ -83,7 +83,7 @@ for i in range(grid.ngpus): | ||||
|     arguments['context'] = grid.cuda_contexts[i] | ||||
|     arguments['theta'] = 1.2 | ||||
| 
 | ||||
|     sims.append(EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**arguments)) | ||||
|     sims.append(EE2DKP07Dimsplit(**arguments)) | ||||
|     # sims[i] = SHMEMSimulator(i, local_sim, grid) # 1st attempt: no wrapper (per sim) | ||||
| 
 | ||||
| arguments['sims'] = sims | ||||
|  | ||||
| @ -29,7 +29,7 @@ import pycuda.driver as cuda | ||||
| # Simulator engine etc | ||||
| from GPUSimulators.common import run_simulation | ||||
| from GPUSimulators.gpu import CudaContext | ||||
| from GPUSimulators import EE2D_KP07_dimsplit | ||||
| from GPUSimulators.model import EE2DKP07Dimsplit | ||||
| from GPUSimulators.helpers import InitialConditions as IC | ||||
| 
 | ||||
| import argparse | ||||
| @ -96,7 +96,7 @@ logger.info("Running simulation") | ||||
| 
 | ||||
| 
 | ||||
| def gen_sim(**kwargs): | ||||
|     local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs) | ||||
|     local_sim = EE2DKP07Dimsplit(**kwargs) | ||||
|     return local_sim | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg