mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2026-01-13 23:28:41 +01:00
refactor(autotuner): move models to a separate package
This commit is contained in:
309
Autotuning.ipynb
309
Autotuning.ipynb
@@ -45,23 +45,22 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner\n",
|
||||
"from GPUSimulators import Autotuner\n",
|
||||
"from GPUSimulators.model import Force, HLL, HLL2, KP07, LxF, WAF, KP07Dimsplit\n",
|
||||
"from GPUSimulators.common import Timer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%setup_logging --out autotuning.log --name=autotuning"
|
||||
]
|
||||
"execution_count": null,
|
||||
"source": "%setup_logging --out autotuning.log --name=autotuning"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -111,28 +110,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "All-NaN slice encountered",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[0;32mIn[9], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [autotuner\u001B[38;5;241m.\u001B[39mget_peak_performance(simulator) \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m 3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m 4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n",
|
||||
"Cell \u001B[0;32mIn[9], line 2\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m 1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m 3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m 4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m 130\u001B[0m block_widths \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m 131\u001B[0m block_heights \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[0;32m--> 132\u001B[0m j, i \u001B[38;5;241m=\u001B[39m \u001B[43mfind_max_index\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key] \u001B[38;5;241m=\u001B[39m { \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_widths[i],\n\u001B[1;32m 135\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_heights[j],\n\u001B[1;32m 136\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m\"\u001B[39m: megacells[j, i] }\n\u001B[1;32m 137\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mReturning \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m as peak performance parameters\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key])\n",
|
||||
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001B[0;34m(megacells)\u001B[0m\n\u001B[1;32m 125\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n\u001B[0;32m--> 126\u001B[0m max_index \u001B[38;5;241m=\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnanargmax\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 127\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m np\u001B[38;5;241m.\u001B[39munravel_index(max_index, megacells\u001B[38;5;241m.\u001B[39mshape)\n",
|
||||
"File \u001B[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001B[0m, in \u001B[0;36mnanargmax\u001B[0;34m(a, axis, out, keepdims)\u001B[0m\n\u001B[1;32m 611\u001B[0m mask \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mall(mask, axis\u001B[38;5;241m=\u001B[39maxis)\n\u001B[1;32m 612\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m np\u001B[38;5;241m.\u001B[39many(mask):\n\u001B[0;32m--> 613\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll-NaN slice encountered\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 614\u001B[0m res \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39margmax(a, axis\u001B[38;5;241m=\u001B[39maxis, out\u001B[38;5;241m=\u001B[39mout, keepdims\u001B[38;5;241m=\u001B[39mkeepdims)\n\u001B[1;32m 615\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m res\n",
|
||||
"\u001B[0;31mValueError\u001B[0m: All-NaN slice encountered"
|
||||
]
|
||||
}
|
||||
],
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n",
|
||||
"simulators = [LxF, Force, HLL, HLL2, KP07, KP07Dimsplit, WAF]\n",
|
||||
"peak_performance = [autotuner.get_peak_performance(simulator) for simulator in simulators]\n",
|
||||
"megacells = [performance['megacells'] for performance in peak_performance]\n",
|
||||
"xlabels = [f\"{simulators[i].__name__}\\n[{performance['block_width']}x{performance['block_height']}]\" for i, performance\n",
|
||||
@@ -146,28 +129,11 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Registering my_context in user workspace\n",
|
||||
"PyCUDA version 2017.1.1\n",
|
||||
"CUDA version (9, 1, 0)\n",
|
||||
"Driver version 9010\n",
|
||||
"Using 'GeForce 840M' GPU\n",
|
||||
"Created context handle <694827722560>\n",
|
||||
"Using CUDA cache dir c:\\Users\\anbro\\Documents\\projects\\ShallowWaterGPU\\GPUSimulators\\cuda_cache\n",
|
||||
"Autotuning enabled. It may take several minutes to run the code the first time: have patience\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%cuda_context_handler my_context"
|
||||
]
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": "%cuda_context_handler my_context"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
@@ -258,239 +224,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LxF\n",
|
||||
"[63x63] => 107.3 (0.000185)\n",
|
||||
"[127x127] => 165.6 (0.000487)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\anbro\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in sqrt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[191x191] => 183.4 (0.000995)\n",
|
||||
"[255x255] => 180.0 (0.001806)\n",
|
||||
"[319x319] => 185.8 (0.002738)\n",
|
||||
"[383x383] => 187.3 (0.003915)\n",
|
||||
"[447x447] => 189.7 (0.005266)\n",
|
||||
"[511x511] => 191.8 (0.006806)\n",
|
||||
"[639x639] => 193.6 (0.010548)\n",
|
||||
"[767x767] => 193.7 (0.015182)\n",
|
||||
"[895x895] => 195.6 (0.020481)\n",
|
||||
"[1023x1023] => 195.0 (0.026839)\n",
|
||||
"[1151x1151] => 195.8 (0.033822)\n",
|
||||
"[1279x1279] => 196.1 (0.041711)\n",
|
||||
"[1407x1407] => 196.2 (0.050439)\n",
|
||||
"[1535x1535] => 196.4 (0.059986)\n",
|
||||
"[1663x1663] => 196.6 (0.070330)\n",
|
||||
"[1791x1791] => 196.7 (0.081546)\n",
|
||||
"[1919x1919] => 196.9 (0.093511)\n",
|
||||
"[2047x2047] => 202.9 (0.103257)\n",
|
||||
"[2303x2303] => 210.7 (0.125838)\n",
|
||||
"[2559x2559] => 208.0 (0.157417)\n",
|
||||
"[2815x2815] => 211.6 (0.187229)\n",
|
||||
"[3071x3071] => 208.7 (0.225954)\n",
|
||||
"[3327x3327] => 214.2 (0.258395)\n",
|
||||
"[3583x3583] => 214.2 (0.299629)\n",
|
||||
"[3839x3839] => 214.2 (0.343982)\n",
|
||||
"[4095x4095] => 214.9 (0.390088)\n",
|
||||
"FORCE\n",
|
||||
"[63x63] => 94.3 (0.000210)\n",
|
||||
"[127x127] => 136.5 (0.000591)\n",
|
||||
"[191x191] => 147.0 (0.001241)\n",
|
||||
"[255x255] => 148.5 (0.002189)\n",
|
||||
"[319x319] => 151.6 (0.003357)\n",
|
||||
"[383x383] => 153.0 (0.004793)\n",
|
||||
"[447x447] => 153.9 (0.006494)\n",
|
||||
"[511x511] => 155.0 (0.008421)\n",
|
||||
"[639x639] => 156.4 (0.013056)\n",
|
||||
"[767x767] => 156.5 (0.018790)\n",
|
||||
"[895x895] => 157.0 (0.025514)\n",
|
||||
"[1023x1023] => 143.6 (0.036450)\n",
|
||||
"[1151x1151] => 143.6 (0.046115)\n",
|
||||
"[1279x1279] => 143.8 (0.056865)\n",
|
||||
"[1407x1407] => 143.9 (0.068797)\n",
|
||||
"[1535x1535] => 144.0 (0.081832)\n",
|
||||
"[1663x1663] => 144.0 (0.096007)\n",
|
||||
"[1791x1791] => 144.0 (0.111343)\n",
|
||||
"[1919x1919] => 144.2 (0.127712)\n",
|
||||
"[2047x2047] => 151.7 (0.138153)\n",
|
||||
"[2303x2303] => 147.3 (0.180021)\n",
|
||||
"[2559x2559] => 154.3 (0.212248)\n",
|
||||
"[2815x2815] => 158.3 (0.250279)\n",
|
||||
"[3071x3071] => 156.9 (0.300547)\n",
|
||||
"[3327x3327] => 158.4 (0.349353)\n",
|
||||
"[3583x3583] => 158.4 (0.405175)\n",
|
||||
"[3839x3839] => 158.4 (0.465201)\n",
|
||||
"[4095x4095] => 158.4 (0.529337)\n",
|
||||
"HLL\n",
|
||||
"[63x63] => 65.7 (0.000302)\n",
|
||||
"[127x127] => 98.6 (0.000818)\n",
|
||||
"[191x191] => 108.1 (0.001688)\n",
|
||||
"[255x255] => 109.2 (0.002977)\n",
|
||||
"[319x319] => 111.9 (0.004546)\n",
|
||||
"[383x383] => 113.2 (0.006482)\n",
|
||||
"[447x447] => 113.7 (0.008785)\n",
|
||||
"[511x511] => 114.4 (0.011411)\n",
|
||||
"[639x639] => 115.3 (0.017713)\n",
|
||||
"[767x767] => 115.6 (0.025454)\n",
|
||||
"[895x895] => 105.7 (0.037888)\n",
|
||||
"[1023x1023] => 105.8 (0.049473)\n",
|
||||
"[1151x1151] => 105.9 (0.062558)\n",
|
||||
"[1279x1279] => 106.0 (0.077148)\n",
|
||||
"[1407x1407] => 106.1 (0.093290)\n",
|
||||
"[1535x1535] => 109.8 (0.107271)\n",
|
||||
"[1663x1663] => 106.2 (0.130195)\n",
|
||||
"[1791x1791] => 107.7 (0.148973)\n",
|
||||
"[1919x1919] => 115.0 (0.160104)\n",
|
||||
"[2047x2047] => 113.3 (0.184913)\n",
|
||||
"[2303x2303] => 111.9 (0.236908)\n",
|
||||
"[2559x2559] => 116.6 (0.280840)\n",
|
||||
"[2815x2815] => 116.6 (0.339777)\n",
|
||||
"[3071x3071] => 116.6 (0.404268)\n",
|
||||
"[3327x3327] => 116.6 (0.474572)\n",
|
||||
"[3583x3583] => 116.7 (0.550240)\n",
|
||||
"[3839x3839] => 116.7 (0.631563)\n",
|
||||
"[4095x4095] => 116.7 (0.718161)\n",
|
||||
"HLL2\n",
|
||||
"[63x63] => 44.2 (0.000449)\n",
|
||||
"[127x127] => 63.0 (0.001280)\n",
|
||||
"[191x191] => 68.4 (0.002666)\n",
|
||||
"[255x255] => 69.2 (0.004698)\n",
|
||||
"[319x319] => 70.6 (0.007204)\n",
|
||||
"[383x383] => 71.1 (0.010314)\n",
|
||||
"[447x447] => 71.6 (0.013956)\n",
|
||||
"[511x511] => 72.0 (0.018146)\n",
|
||||
"[639x639] => 72.4 (0.028204)\n",
|
||||
"[767x767] => 72.5 (0.040545)\n",
|
||||
"[895x895] => 72.8 (0.055047)\n",
|
||||
"[1023x1023] => 72.8 (0.071828)\n",
|
||||
"[1151x1151] => 66.5 (0.099652)\n",
|
||||
"[1279x1279] => 69.8 (0.117195)\n",
|
||||
"[1407x1407] => 67.0 (0.147833)\n",
|
||||
"[1535x1535] => 71.3 (0.165185)\n",
|
||||
"[1663x1663] => 71.2 (0.194123)\n",
|
||||
"[1791x1791] => 72.1 (0.222351)\n",
|
||||
"[1919x1919] => 70.3 (0.261847)\n",
|
||||
"[2047x2047] => 73.2 (0.286228)\n",
|
||||
"[2303x2303] => 72.0 (0.368479)\n",
|
||||
"[2559x2559] => 73.2 (0.447096)\n",
|
||||
"[2815x2815] => 73.2 (0.541084)\n",
|
||||
"[3071x3071] => 73.2 (0.643925)\n",
|
||||
"[3327x3327] => 73.2 (0.755588)\n",
|
||||
"[3583x3583] => 73.3 (0.876222)\n",
|
||||
"[3839x3839] => 73.3 (1.005958)\n",
|
||||
"[4095x4095] => 73.3 (1.144158)\n",
|
||||
"KP07\n",
|
||||
"[63x63] => 69.9 (0.000284)\n",
|
||||
"[127x127] => 95.0 (0.000849)\n",
|
||||
"[191x191] => 101.7 (0.001794)\n",
|
||||
"[255x255] => 101.3 (0.003209)\n",
|
||||
"[319x319] => 106.9 (0.004760)\n",
|
||||
"[383x383] => 107.1 (0.006850)\n",
|
||||
"[447x447] => 109.2 (0.009150)\n",
|
||||
"[511x511] => 108.0 (0.012088)\n",
|
||||
"[639x639] => 111.6 (0.018295)\n",
|
||||
"[767x767] => 111.6 (0.026361)\n",
|
||||
"[895x895] => 102.4 (0.039123)\n",
|
||||
"[1023x1023] => 102.2 (0.051186)\n",
|
||||
"[1151x1151] => 102.3 (0.064764)\n",
|
||||
"[1279x1279] => 103.4 (0.079074)\n",
|
||||
"[1407x1407] => 103.2 (0.095876)\n",
|
||||
"[1535x1535] => 106.3 (0.110860)\n",
|
||||
"[1663x1663] => 103.1 (0.134182)\n",
|
||||
"[1791x1791] => 107.7 (0.148853)\n",
|
||||
"[1919x1919] => 105.5 (0.174575)\n",
|
||||
"[2047x2047] => 111.4 (0.188084)\n",
|
||||
"[2303x2303] => 113.5 (0.233650)\n",
|
||||
"[2559x2559] => 114.0 (0.287327)\n",
|
||||
"[2815x2815] => 113.7 (0.348536)\n",
|
||||
"[3071x3071] => 113.2 (0.416533)\n",
|
||||
"[3327x3327] => 113.7 (0.486893)\n",
|
||||
"[3583x3583] => 113.5 (0.565573)\n",
|
||||
"[3839x3839] => 113.5 (0.649058)\n",
|
||||
"[4095x4095] => 113.6 (0.738275)\n",
|
||||
"KP07_dimsplit\n",
|
||||
"[63x63] => 49.9 (0.000397)\n",
|
||||
"[127x127] => 71.7 (0.001125)\n",
|
||||
"[191x191] => 76.8 (0.002374)\n",
|
||||
"[255x255] => 77.5 (0.004197)\n",
|
||||
"[319x319] => 79.0 (0.006437)\n",
|
||||
"[383x383] => 79.8 (0.009189)\n",
|
||||
"[447x447] => 80.3 (0.012449)\n",
|
||||
"[511x511] => 80.6 (0.016191)\n",
|
||||
"[639x639] => 81.1 (0.025171)\n",
|
||||
"[767x767] => 81.3 (0.036181)\n",
|
||||
"[895x895] => 74.3 (0.053902)\n",
|
||||
"[1023x1023] => 74.4 (0.070335)\n",
|
||||
"[1151x1151] => 76.2 (0.086896)\n",
|
||||
"[1279x1279] => 74.5 (0.109725)\n",
|
||||
"[1407x1407] => 74.6 (0.132712)\n",
|
||||
"[1535x1535] => 79.4 (0.148342)\n",
|
||||
"[1663x1663] => 78.3 (0.176547)\n",
|
||||
"[1791x1791] => 81.3 (0.197279)\n",
|
||||
"[1919x1919] => 78.5 (0.234550)\n",
|
||||
"[2047x2047] => 82.0 (0.255396)\n",
|
||||
"[2303x2303] => 81.0 (0.327297)\n",
|
||||
"[2559x2559] => 82.0 (0.399197)\n",
|
||||
"[2815x2815] => 82.0 (0.483034)\n",
|
||||
"[3071x3071] => 82.0 (0.574737)\n",
|
||||
"[3327x3327] => 82.1 (0.674395)\n",
|
||||
"[3583x3583] => 82.1 (0.782180)\n",
|
||||
"[3839x3839] => 82.1 (0.897551)\n",
|
||||
"[4095x4095] => 82.1 (1.020911)\n",
|
||||
"WAF\n",
|
||||
"[63x63] => 32.8 (0.000605)\n",
|
||||
"[127x127] => 45.6 (0.001768)\n",
|
||||
"[191x191] => 53.9 (0.003381)\n",
|
||||
"[255x255] => 54.3 (0.005985)\n",
|
||||
"[319x319] => 57.7 (0.008821)\n",
|
||||
"[383x383] => 56.9 (0.012893)\n",
|
||||
"[447x447] => 59.3 (0.016840)\n",
|
||||
"[511x511] => 58.8 (0.022214)\n",
|
||||
"[639x639] => 59.6 (0.034278)\n",
|
||||
"[767x767] => 60.1 (0.048942)\n",
|
||||
"[895x895] => 55.3 (0.072483)\n",
|
||||
"[1023x1023] => 55.4 (0.094402)\n",
|
||||
"[1151x1151] => 55.7 (0.119006)\n",
|
||||
"[1279x1279] => 55.0 (0.148746)\n",
|
||||
"[1407x1407] => 55.8 (0.177399)\n",
|
||||
"[1535x1535] => 58.7 (0.200663)\n",
|
||||
"[1663x1663] => 57.8 (0.239299)\n",
|
||||
"[1791x1791] => 59.6 (0.269144)\n",
|
||||
"[1919x1919] => 61.1 (0.301218)\n",
|
||||
"[2047x2047] => 61.2 (0.342070)\n",
|
||||
"[2303x2303] => 61.3 (0.432280)\n",
|
||||
"[2559x2559] => 61.0 (0.537125)\n",
|
||||
"[2815x2815] => 61.1 (0.648336)\n",
|
||||
"[3071x3071] => 61.3 (0.769734)\n",
|
||||
"[3327x3327] => 61.4 (0.901199)\n",
|
||||
"[3583x3583] => 61.1 (1.049726)\n",
|
||||
"[3839x3839] => 61.3 (1.202961)\n",
|
||||
"[4095x4095] => 61.4 (1.366446)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"run_simulation = True\n",
|
||||
"sizes = list(range(64, 512, 64)) + list(range(512, 2048, 128)) + list(range(2048, 4096, 256)) + [4096]\n",
|
||||
"simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n",
|
||||
"simulators = [LxF, Force, HLL, HLL2, KP07, KP07Dimsplit, WAF]\n",
|
||||
"if run_simulation:\n",
|
||||
" megacells = {}\n",
|
||||
" for simulator in simulators:\n",
|
||||
@@ -542,18 +283,10 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading from file\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"datafilename = \"megacells.npz\"\n",
|
||||
"if not os.path.isfile(datafilename) and \"megacells\" in globals():\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -31,7 +31,7 @@
|
||||
" from io import StringIO\n",
|
||||
"\n",
|
||||
"from GPUSimulators.common import Timer, DataDumper, ProgressPrinter\n",
|
||||
"from GPUSimulators.EE2D_KP07_dimsplit import EE2D_KP07_dimsplit\n",
|
||||
"from GPUSimulators.model import EE2DKP07Dimsplit\n",
|
||||
"from GPUSimulators.helpers import InitialConditions, Visualization"
|
||||
]
|
||||
},
|
||||
@@ -96,7 +96,7 @@
|
||||
"source": [
|
||||
"def run_simulation(outfile, t_end, sim_args):\n",
|
||||
" with Timer(\"construct\") as t:\n",
|
||||
" sim = EE2D_KP07_dimsplit(**sim_args)\n",
|
||||
" sim = EE2DKP07Dimsplit(**sim_args)\n",
|
||||
" print(\"Constructed in \" + str(t.secs) + \" seconds\")\n",
|
||||
"\n",
|
||||
" #Create a netcdf file and simulate\n",
|
||||
|
||||
8
GPUSimulators/model/__init__.py
Normal file
8
GPUSimulators/model/__init__.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from ee2d_kp07_dimsplit import EE2DKP07Dimsplit
|
||||
from force import Force
|
||||
from hll import HLL
|
||||
from hll2 import HLL2
|
||||
from kp07 import KP07
|
||||
from kp07_dimsplit import KP07Dimsplit
|
||||
from lxf import LxF
|
||||
from waf import WAF
|
||||
@@ -27,7 +27,7 @@ from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
|
||||
|
||||
|
||||
class EE2D_KP07_dimsplit(BaseSimulator):
|
||||
class EE2DKP07Dimsplit(BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the Forward-Backward linear scheme
|
||||
"""
|
||||
@@ -29,7 +29,7 @@ from GPUSimulators import Simulator
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
|
||||
class FORCE(Simulator.BaseSimulator):
|
||||
class Force(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations
|
||||
"""
|
||||
@@ -33,7 +33,7 @@ from GPUSimulators.common import ArakawaA2D
|
||||
from GPUSimulators.Simulator import BoundaryCondition
|
||||
|
||||
|
||||
class KP07_dimsplit(Simulator.BaseSimulator):
|
||||
class KP07Dimsplit(Simulator.BaseSimulator):
|
||||
"""
|
||||
Class that solves the SW equations using the dimentionally split KP07 scheme
|
||||
"""
|
||||
@@ -169,16 +169,14 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-06-24T16:05:59.437729Z",
|
||||
"start_time": "2025-06-24T16:05:59.432669Z"
|
||||
}
|
||||
},
|
||||
"metadata": {},
|
||||
"cell_type": "code",
|
||||
"outputs": [],
|
||||
"execution_count": null,
|
||||
"source": [
|
||||
"%%px\n",
|
||||
"\n",
|
||||
"from GPUSimulators.model import EE2DKP07Dimsplit\n",
|
||||
"from GPUSimulators.helpers import InitialConditions\n",
|
||||
"\n",
|
||||
"my_context.autotuner = None\n",
|
||||
@@ -201,27 +199,17 @@
|
||||
"arguments['theta'] = 1.2\n",
|
||||
"arguments['grid'] = grid\n",
|
||||
"\n",
|
||||
"from GPUSimulators import EE2D_KP07_dimsplit\n",
|
||||
"from GPUSimulators.model import ee2d_kp07_dimsplit, hll2\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def gen_sim(grid, **kwargs):\n",
|
||||
" local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)\n",
|
||||
" local_sim = EE2DKP07Dimsplit(**kwargs)\n",
|
||||
" sim = MPISimulator.MPISimulator(local_sim, grid)\n",
|
||||
" return sim\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"outfile = run_simulation(gen_sim, arguments, outfile, save_times, save_var_names)"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"UsageError: Cell magic `%%px` not found.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 1
|
||||
]
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
@@ -271,6 +259,7 @@
|
||||
"source": [
|
||||
"%%px\n",
|
||||
"\n",
|
||||
"from GPUSimulators.model import HLL2\n",
|
||||
"from GPUSimulators.helpers import InitialConditions\n",
|
||||
"from GPUSimulators.Simulator import BoundaryCondition\n",
|
||||
"\n",
|
||||
@@ -307,11 +296,9 @@
|
||||
" 'grid': grid\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"from GPUSimulators import HLL2\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def gen_sim(grid, **kwargs):\n",
|
||||
" local_sim = HLL2.HLL2(**kwargs)\n",
|
||||
" local_sim = HLL2(**kwargs)\n",
|
||||
" sim = MPISimulator.MPISimulator(local_sim, grid)\n",
|
||||
" return sim\n",
|
||||
"\n",
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -36,7 +36,7 @@ import pycuda.driver as cuda
|
||||
from GPUSimulators import MPISimulator
|
||||
from GPUSimulators.common import run_simulation, get_git_hash, get_git_status
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
from GPUSimulators import EE2D_KP07_dimsplit
|
||||
from GPUSimulators.model import EE2DKP07Dimsplit
|
||||
from GPUSimulators.helpers import InitialConditions as IC
|
||||
|
||||
import argparse
|
||||
@@ -137,7 +137,7 @@ logger.info("Running simulation")
|
||||
|
||||
|
||||
def genSim(grid, **kwargs):
|
||||
local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
|
||||
local_sim = EE2DKP07Dimsplit(**kwargs)
|
||||
sim = MPISimulator.MPISimulator(local_sim, grid)
|
||||
return sim
|
||||
|
||||
|
||||
@@ -25,8 +25,8 @@ import logging
|
||||
|
||||
# Simulator engine etc
|
||||
from GPUSimulators import SHMEMSimulatorGroup
|
||||
from GPUSimulators.common import common, run_simulation
|
||||
from GPUSimulators import EE2D_KP07_dimsplit
|
||||
from GPUSimulators.common import run_simulation
|
||||
from GPUSimulators.model import EE2DKP07Dimsplit
|
||||
from GPUSimulators.helpers import InitialConditions as IC
|
||||
|
||||
####
|
||||
@@ -83,7 +83,7 @@ for i in range(grid.ngpus):
|
||||
arguments['context'] = grid.cuda_contexts[i]
|
||||
arguments['theta'] = 1.2
|
||||
|
||||
sims.append(EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**arguments))
|
||||
sims.append(EE2DKP07Dimsplit(**arguments))
|
||||
# sims[i] = SHMEMSimulator(i, local_sim, grid) # 1st attempt: no wrapper (per sim)
|
||||
|
||||
arguments['sims'] = sims
|
||||
|
||||
@@ -29,7 +29,7 @@ import pycuda.driver as cuda
|
||||
# Simulator engine etc
|
||||
from GPUSimulators.common import run_simulation
|
||||
from GPUSimulators.gpu import CudaContext
|
||||
from GPUSimulators import EE2D_KP07_dimsplit
|
||||
from GPUSimulators.model import EE2DKP07Dimsplit
|
||||
from GPUSimulators.helpers import InitialConditions as IC
|
||||
|
||||
import argparse
|
||||
@@ -96,7 +96,7 @@ logger.info("Running simulation")
|
||||
|
||||
|
||||
def gen_sim(**kwargs):
|
||||
local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
|
||||
local_sim = EE2DKP07Dimsplit(**kwargs)
|
||||
return local_sim
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user