refactor(autotuner): move models to a separate package

This commit is contained in:
Anthony Berg 2025-06-24 18:53:33 +02:00
parent ae9d2b1595
commit 3b424d1f5a
19 changed files with 226 additions and 990 deletions

View File

@ -45,23 +45,22 @@
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"from GPUSimulators import LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF, Autotuner\n",
"from GPUSimulators import Autotuner\n",
"from GPUSimulators.model import Force, HLL, HLL2, KP07, LxF, WAF, KP07Dimsplit\n",
"from GPUSimulators.common import Timer"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"cell_type": "code",
"outputs": [],
"source": [
"%setup_logging --out autotuning.log --name=autotuning"
]
"execution_count": null,
"source": "%setup_logging --out autotuning.log --name=autotuning"
},
{
"cell_type": "code",
@ -111,28 +110,12 @@
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "All-NaN slice encountered",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mValueError\u001B[0m Traceback (most recent call last)",
"Cell \u001B[0;32mIn[9], line 2\u001B[0m\n\u001B[1;32m 1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [autotuner\u001B[38;5;241m.\u001B[39mget_peak_performance(simulator) \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m 3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m 4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n",
"Cell \u001B[0;32mIn[9], line 2\u001B[0m, in \u001B[0;36m<listcomp>\u001B[0;34m(.0)\u001B[0m\n\u001B[1;32m 1\u001B[0m simulators \u001B[38;5;241m=\u001B[39m [LxF\u001B[38;5;241m.\u001B[39mLxF, FORCE\u001B[38;5;241m.\u001B[39mFORCE, HLL\u001B[38;5;241m.\u001B[39mHLL, HLL2\u001B[38;5;241m.\u001B[39mHLL2, KP07\u001B[38;5;241m.\u001B[39mKP07, KP07_dimsplit\u001B[38;5;241m.\u001B[39mKP07_dimsplit, WAF\u001B[38;5;241m.\u001B[39mWAF]\n\u001B[0;32m----> 2\u001B[0m peak_performance \u001B[38;5;241m=\u001B[39m [\u001B[43mautotuner\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mget_peak_performance\u001B[49m\u001B[43m(\u001B[49m\u001B[43msimulator\u001B[49m\u001B[43m)\u001B[49m \u001B[38;5;28;01mfor\u001B[39;00m simulator \u001B[38;5;129;01min\u001B[39;00m simulators]\n\u001B[1;32m 3\u001B[0m megacells \u001B[38;5;241m=\u001B[39m [performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m'\u001B[39m] \u001B[38;5;28;01mfor\u001B[39;00m performance \u001B[38;5;129;01min\u001B[39;00m peak_performance]\n\u001B[1;32m 4\u001B[0m xlabels \u001B[38;5;241m=\u001B[39m [\u001B[38;5;124m\"\u001B[39m\u001B[38;5;132;01m{:s}\u001B[39;00m\u001B[38;5;130;01m\\n\u001B[39;00m\u001B[38;5;124m[\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124mx\u001B[39m\u001B[38;5;132;01m{:d}\u001B[39;00m\u001B[38;5;124m]\u001B[39m\u001B[38;5;124m\"\u001B[39m\u001B[38;5;241m.\u001B[39mformat(simulators[i]\u001B[38;5;241m.\u001B[39m\u001B[38;5;18m__name__\u001B[39m, performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m'\u001B[39m], performance[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m'\u001B[39m]) \u001B[38;5;28;01mfor\u001B[39;00m i, performance \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28menumerate\u001B[39m(peak_performance)]\n",
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:132\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance\u001B[0;34m(self, simulator)\u001B[0m\n\u001B[1;32m 130\u001B[0m block_widths \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_widths\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[1;32m 131\u001B[0m block_heights \u001B[38;5;241m=\u001B[39m data[key \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m_block_heights\u001B[39m\u001B[38;5;124m'\u001B[39m]\n\u001B[0;32m--> 132\u001B[0m j, i \u001B[38;5;241m=\u001B[39m \u001B[43mfind_max_index\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 134\u001B[0m \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key] \u001B[38;5;241m=\u001B[39m { \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_width\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_widths[i],\n\u001B[1;32m 135\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mblock_height\u001B[39m\u001B[38;5;124m\"\u001B[39m: block_heights[j],\n\u001B[1;32m 136\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mmegacells\u001B[39m\u001B[38;5;124m\"\u001B[39m: megacells[j, i] }\n\u001B[1;32m 137\u001B[0m logger\u001B[38;5;241m.\u001B[39mdebug(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mReturning \u001B[39m\u001B[38;5;132;01m%s\u001B[39;00m\u001B[38;5;124m as peak performance parameters\u001B[39m\u001B[38;5;124m\"\u001B[39m, \u001B[38;5;28mself\u001B[39m\u001B[38;5;241m.\u001B[39mperformance[key])\n",
"File \u001B[0;32m~/PycharmProjects/FiniteVolumeGPU/GPUSimulators/Autotuner.py:126\u001B[0m, in \u001B[0;36mAutotuner.get_peak_performance.<locals>.find_max_index\u001B[0;34m(megacells)\u001B[0m\n\u001B[1;32m 125\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfind_max_index\u001B[39m(megacells):\n\u001B[0;32m--> 126\u001B[0m max_index \u001B[38;5;241m=\u001B[39m \u001B[43mnp\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mnanargmax\u001B[49m\u001B[43m(\u001B[49m\u001B[43mmegacells\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 127\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m np\u001B[38;5;241m.\u001B[39munravel_index(max_index, megacells\u001B[38;5;241m.\u001B[39mshape)\n",
"File \u001B[0;32m~/.conda/envs/ShallowWaterGPU/lib/python3.9/site-packages/numpy/lib/nanfunctions.py:613\u001B[0m, in \u001B[0;36mnanargmax\u001B[0;34m(a, axis, out, keepdims)\u001B[0m\n\u001B[1;32m 611\u001B[0m mask \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39mall(mask, axis\u001B[38;5;241m=\u001B[39maxis)\n\u001B[1;32m 612\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m np\u001B[38;5;241m.\u001B[39many(mask):\n\u001B[0;32m--> 613\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[38;5;124m\"\u001B[39m\u001B[38;5;124mAll-NaN slice encountered\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[1;32m 614\u001B[0m res \u001B[38;5;241m=\u001B[39m np\u001B[38;5;241m.\u001B[39margmax(a, axis\u001B[38;5;241m=\u001B[39maxis, out\u001B[38;5;241m=\u001B[39mout, keepdims\u001B[38;5;241m=\u001B[39mkeepdims)\n\u001B[1;32m 615\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m res\n",
"\u001B[0;31mValueError\u001B[0m: All-NaN slice encountered"
]
}
],
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n",
"simulators = [LxF, Force, HLL, HLL2, KP07, KP07Dimsplit, WAF]\n",
"peak_performance = [autotuner.get_peak_performance(simulator) for simulator in simulators]\n",
"megacells = [performance['megacells'] for performance in peak_performance]\n",
"xlabels = [f\"{simulators[i].__name__}\\n[{performance['block_width']}x{performance['block_height']}]\" for i, performance\n",
@ -146,28 +129,11 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Registering my_context in user workspace\n",
"PyCUDA version 2017.1.1\n",
"CUDA version (9, 1, 0)\n",
"Driver version 9010\n",
"Using 'GeForce 840M' GPU\n",
"Created context handle <694827722560>\n",
"Using CUDA cache dir c:\\Users\\anbro\\Documents\\projects\\ShallowWaterGPU\\GPUSimulators\\cuda_cache\n",
"Autotuning enabled. It may take several minutes to run the code the first time: have patience\n"
]
}
],
"source": [
"%cuda_context_handler my_context"
]
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": "%cuda_context_handler my_context"
},
{
"cell_type": "code",
@ -258,239 +224,14 @@
]
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LxF\n",
"[63x63] => 107.3 (0.000185)\n",
"[127x127] => 165.6 (0.000487)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\anbro\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in sqrt\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[191x191] => 183.4 (0.000995)\n",
"[255x255] => 180.0 (0.001806)\n",
"[319x319] => 185.8 (0.002738)\n",
"[383x383] => 187.3 (0.003915)\n",
"[447x447] => 189.7 (0.005266)\n",
"[511x511] => 191.8 (0.006806)\n",
"[639x639] => 193.6 (0.010548)\n",
"[767x767] => 193.7 (0.015182)\n",
"[895x895] => 195.6 (0.020481)\n",
"[1023x1023] => 195.0 (0.026839)\n",
"[1151x1151] => 195.8 (0.033822)\n",
"[1279x1279] => 196.1 (0.041711)\n",
"[1407x1407] => 196.2 (0.050439)\n",
"[1535x1535] => 196.4 (0.059986)\n",
"[1663x1663] => 196.6 (0.070330)\n",
"[1791x1791] => 196.7 (0.081546)\n",
"[1919x1919] => 196.9 (0.093511)\n",
"[2047x2047] => 202.9 (0.103257)\n",
"[2303x2303] => 210.7 (0.125838)\n",
"[2559x2559] => 208.0 (0.157417)\n",
"[2815x2815] => 211.6 (0.187229)\n",
"[3071x3071] => 208.7 (0.225954)\n",
"[3327x3327] => 214.2 (0.258395)\n",
"[3583x3583] => 214.2 (0.299629)\n",
"[3839x3839] => 214.2 (0.343982)\n",
"[4095x4095] => 214.9 (0.390088)\n",
"FORCE\n",
"[63x63] => 94.3 (0.000210)\n",
"[127x127] => 136.5 (0.000591)\n",
"[191x191] => 147.0 (0.001241)\n",
"[255x255] => 148.5 (0.002189)\n",
"[319x319] => 151.6 (0.003357)\n",
"[383x383] => 153.0 (0.004793)\n",
"[447x447] => 153.9 (0.006494)\n",
"[511x511] => 155.0 (0.008421)\n",
"[639x639] => 156.4 (0.013056)\n",
"[767x767] => 156.5 (0.018790)\n",
"[895x895] => 157.0 (0.025514)\n",
"[1023x1023] => 143.6 (0.036450)\n",
"[1151x1151] => 143.6 (0.046115)\n",
"[1279x1279] => 143.8 (0.056865)\n",
"[1407x1407] => 143.9 (0.068797)\n",
"[1535x1535] => 144.0 (0.081832)\n",
"[1663x1663] => 144.0 (0.096007)\n",
"[1791x1791] => 144.0 (0.111343)\n",
"[1919x1919] => 144.2 (0.127712)\n",
"[2047x2047] => 151.7 (0.138153)\n",
"[2303x2303] => 147.3 (0.180021)\n",
"[2559x2559] => 154.3 (0.212248)\n",
"[2815x2815] => 158.3 (0.250279)\n",
"[3071x3071] => 156.9 (0.300547)\n",
"[3327x3327] => 158.4 (0.349353)\n",
"[3583x3583] => 158.4 (0.405175)\n",
"[3839x3839] => 158.4 (0.465201)\n",
"[4095x4095] => 158.4 (0.529337)\n",
"HLL\n",
"[63x63] => 65.7 (0.000302)\n",
"[127x127] => 98.6 (0.000818)\n",
"[191x191] => 108.1 (0.001688)\n",
"[255x255] => 109.2 (0.002977)\n",
"[319x319] => 111.9 (0.004546)\n",
"[383x383] => 113.2 (0.006482)\n",
"[447x447] => 113.7 (0.008785)\n",
"[511x511] => 114.4 (0.011411)\n",
"[639x639] => 115.3 (0.017713)\n",
"[767x767] => 115.6 (0.025454)\n",
"[895x895] => 105.7 (0.037888)\n",
"[1023x1023] => 105.8 (0.049473)\n",
"[1151x1151] => 105.9 (0.062558)\n",
"[1279x1279] => 106.0 (0.077148)\n",
"[1407x1407] => 106.1 (0.093290)\n",
"[1535x1535] => 109.8 (0.107271)\n",
"[1663x1663] => 106.2 (0.130195)\n",
"[1791x1791] => 107.7 (0.148973)\n",
"[1919x1919] => 115.0 (0.160104)\n",
"[2047x2047] => 113.3 (0.184913)\n",
"[2303x2303] => 111.9 (0.236908)\n",
"[2559x2559] => 116.6 (0.280840)\n",
"[2815x2815] => 116.6 (0.339777)\n",
"[3071x3071] => 116.6 (0.404268)\n",
"[3327x3327] => 116.6 (0.474572)\n",
"[3583x3583] => 116.7 (0.550240)\n",
"[3839x3839] => 116.7 (0.631563)\n",
"[4095x4095] => 116.7 (0.718161)\n",
"HLL2\n",
"[63x63] => 44.2 (0.000449)\n",
"[127x127] => 63.0 (0.001280)\n",
"[191x191] => 68.4 (0.002666)\n",
"[255x255] => 69.2 (0.004698)\n",
"[319x319] => 70.6 (0.007204)\n",
"[383x383] => 71.1 (0.010314)\n",
"[447x447] => 71.6 (0.013956)\n",
"[511x511] => 72.0 (0.018146)\n",
"[639x639] => 72.4 (0.028204)\n",
"[767x767] => 72.5 (0.040545)\n",
"[895x895] => 72.8 (0.055047)\n",
"[1023x1023] => 72.8 (0.071828)\n",
"[1151x1151] => 66.5 (0.099652)\n",
"[1279x1279] => 69.8 (0.117195)\n",
"[1407x1407] => 67.0 (0.147833)\n",
"[1535x1535] => 71.3 (0.165185)\n",
"[1663x1663] => 71.2 (0.194123)\n",
"[1791x1791] => 72.1 (0.222351)\n",
"[1919x1919] => 70.3 (0.261847)\n",
"[2047x2047] => 73.2 (0.286228)\n",
"[2303x2303] => 72.0 (0.368479)\n",
"[2559x2559] => 73.2 (0.447096)\n",
"[2815x2815] => 73.2 (0.541084)\n",
"[3071x3071] => 73.2 (0.643925)\n",
"[3327x3327] => 73.2 (0.755588)\n",
"[3583x3583] => 73.3 (0.876222)\n",
"[3839x3839] => 73.3 (1.005958)\n",
"[4095x4095] => 73.3 (1.144158)\n",
"KP07\n",
"[63x63] => 69.9 (0.000284)\n",
"[127x127] => 95.0 (0.000849)\n",
"[191x191] => 101.7 (0.001794)\n",
"[255x255] => 101.3 (0.003209)\n",
"[319x319] => 106.9 (0.004760)\n",
"[383x383] => 107.1 (0.006850)\n",
"[447x447] => 109.2 (0.009150)\n",
"[511x511] => 108.0 (0.012088)\n",
"[639x639] => 111.6 (0.018295)\n",
"[767x767] => 111.6 (0.026361)\n",
"[895x895] => 102.4 (0.039123)\n",
"[1023x1023] => 102.2 (0.051186)\n",
"[1151x1151] => 102.3 (0.064764)\n",
"[1279x1279] => 103.4 (0.079074)\n",
"[1407x1407] => 103.2 (0.095876)\n",
"[1535x1535] => 106.3 (0.110860)\n",
"[1663x1663] => 103.1 (0.134182)\n",
"[1791x1791] => 107.7 (0.148853)\n",
"[1919x1919] => 105.5 (0.174575)\n",
"[2047x2047] => 111.4 (0.188084)\n",
"[2303x2303] => 113.5 (0.233650)\n",
"[2559x2559] => 114.0 (0.287327)\n",
"[2815x2815] => 113.7 (0.348536)\n",
"[3071x3071] => 113.2 (0.416533)\n",
"[3327x3327] => 113.7 (0.486893)\n",
"[3583x3583] => 113.5 (0.565573)\n",
"[3839x3839] => 113.5 (0.649058)\n",
"[4095x4095] => 113.6 (0.738275)\n",
"KP07_dimsplit\n",
"[63x63] => 49.9 (0.000397)\n",
"[127x127] => 71.7 (0.001125)\n",
"[191x191] => 76.8 (0.002374)\n",
"[255x255] => 77.5 (0.004197)\n",
"[319x319] => 79.0 (0.006437)\n",
"[383x383] => 79.8 (0.009189)\n",
"[447x447] => 80.3 (0.012449)\n",
"[511x511] => 80.6 (0.016191)\n",
"[639x639] => 81.1 (0.025171)\n",
"[767x767] => 81.3 (0.036181)\n",
"[895x895] => 74.3 (0.053902)\n",
"[1023x1023] => 74.4 (0.070335)\n",
"[1151x1151] => 76.2 (0.086896)\n",
"[1279x1279] => 74.5 (0.109725)\n",
"[1407x1407] => 74.6 (0.132712)\n",
"[1535x1535] => 79.4 (0.148342)\n",
"[1663x1663] => 78.3 (0.176547)\n",
"[1791x1791] => 81.3 (0.197279)\n",
"[1919x1919] => 78.5 (0.234550)\n",
"[2047x2047] => 82.0 (0.255396)\n",
"[2303x2303] => 81.0 (0.327297)\n",
"[2559x2559] => 82.0 (0.399197)\n",
"[2815x2815] => 82.0 (0.483034)\n",
"[3071x3071] => 82.0 (0.574737)\n",
"[3327x3327] => 82.1 (0.674395)\n",
"[3583x3583] => 82.1 (0.782180)\n",
"[3839x3839] => 82.1 (0.897551)\n",
"[4095x4095] => 82.1 (1.020911)\n",
"WAF\n",
"[63x63] => 32.8 (0.000605)\n",
"[127x127] => 45.6 (0.001768)\n",
"[191x191] => 53.9 (0.003381)\n",
"[255x255] => 54.3 (0.005985)\n",
"[319x319] => 57.7 (0.008821)\n",
"[383x383] => 56.9 (0.012893)\n",
"[447x447] => 59.3 (0.016840)\n",
"[511x511] => 58.8 (0.022214)\n",
"[639x639] => 59.6 (0.034278)\n",
"[767x767] => 60.1 (0.048942)\n",
"[895x895] => 55.3 (0.072483)\n",
"[1023x1023] => 55.4 (0.094402)\n",
"[1151x1151] => 55.7 (0.119006)\n",
"[1279x1279] => 55.0 (0.148746)\n",
"[1407x1407] => 55.8 (0.177399)\n",
"[1535x1535] => 58.7 (0.200663)\n",
"[1663x1663] => 57.8 (0.239299)\n",
"[1791x1791] => 59.6 (0.269144)\n",
"[1919x1919] => 61.1 (0.301218)\n",
"[2047x2047] => 61.2 (0.342070)\n",
"[2303x2303] => 61.3 (0.432280)\n",
"[2559x2559] => 61.0 (0.537125)\n",
"[2815x2815] => 61.1 (0.648336)\n",
"[3071x3071] => 61.3 (0.769734)\n",
"[3327x3327] => 61.4 (0.901199)\n",
"[3583x3583] => 61.1 (1.049726)\n",
"[3839x3839] => 61.3 (1.202961)\n",
"[4095x4095] => 61.4 (1.366446)\n"
]
}
],
"source": [
"run_simulation = True\n",
"sizes = list(range(64, 512, 64)) + list(range(512, 2048, 128)) + list(range(2048, 4096, 256)) + [4096]\n",
"simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n",
"simulators = [LxF, Force, HLL, HLL2, KP07, KP07Dimsplit, WAF]\n",
"if run_simulation:\n",
" megacells = {}\n",
" for simulator in simulators:\n",
@ -542,18 +283,10 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading from file\n"
]
}
],
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"datafilename = \"megacells.npz\"\n",
"if not os.path.isfile(datafilename) and \"megacells\" in globals():\n",

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -31,7 +31,7 @@
" from io import StringIO\n",
"\n",
"from GPUSimulators.common import Timer, DataDumper, ProgressPrinter\n",
"from GPUSimulators.EE2D_KP07_dimsplit import EE2D_KP07_dimsplit\n",
"from GPUSimulators.model import EE2DKP07Dimsplit\n",
"from GPUSimulators.helpers import InitialConditions, Visualization"
]
},
@ -96,7 +96,7 @@
"source": [
"def run_simulation(outfile, t_end, sim_args):\n",
" with Timer(\"construct\") as t:\n",
" sim = EE2D_KP07_dimsplit(**sim_args)\n",
" sim = EE2DKP07Dimsplit(**sim_args)\n",
" print(\"Constructed in \" + str(t.secs) + \" seconds\")\n",
"\n",
" #Create a netcdf file and simulate\n",

View File

@ -0,0 +1,8 @@
from ee2d_kp07_dimsplit import EE2DKP07Dimsplit
from force import Force
from hll import HLL
from hll2 import HLL2
from kp07 import KP07
from kp07_dimsplit import KP07Dimsplit
from lxf import LxF
from waf import WAF

View File

@ -27,7 +27,7 @@ from GPUSimulators.common import ArakawaA2D
from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
class EE2D_KP07_dimsplit(BaseSimulator):
class EE2DKP07Dimsplit(BaseSimulator):
"""
Class that solves the SW equations using the Forward-Backward linear scheme
"""

View File

@ -29,7 +29,7 @@ from GPUSimulators import Simulator
from GPUSimulators.Simulator import BoundaryCondition
class FORCE(Simulator.BaseSimulator):
class Force(Simulator.BaseSimulator):
"""
Class that solves the SW equations
"""

View File

@ -33,7 +33,7 @@ from GPUSimulators.common import ArakawaA2D
from GPUSimulators.Simulator import BoundaryCondition
class KP07_dimsplit(Simulator.BaseSimulator):
class KP07Dimsplit(Simulator.BaseSimulator):
"""
Class that solves the SW equations using the dimentionally split KP07 scheme
"""

View File

@ -169,16 +169,14 @@
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-24T16:05:59.437729Z",
"start_time": "2025-06-24T16:05:59.432669Z"
}
},
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"%%px\n",
"\n",
"from GPUSimulators.model import EE2DKP07Dimsplit\n",
"from GPUSimulators.helpers import InitialConditions\n",
"\n",
"my_context.autotuner = None\n",
@ -201,27 +199,17 @@
"arguments['theta'] = 1.2\n",
"arguments['grid'] = grid\n",
"\n",
"from GPUSimulators import EE2D_KP07_dimsplit\n",
"from GPUSimulators.model import ee2d_kp07_dimsplit, hll2\n",
"\n",
"\n",
"def gen_sim(grid, **kwargs):\n",
" local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)\n",
" local_sim = EE2DKP07Dimsplit(**kwargs)\n",
" sim = MPISimulator.MPISimulator(local_sim, grid)\n",
" return sim\n",
"\n",
"\n",
"outfile = run_simulation(gen_sim, arguments, outfile, save_times, save_var_names)"
],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"UsageError: Cell magic `%%px` not found.\n"
]
}
],
"execution_count": 1
]
},
{
"metadata": {},
@ -271,6 +259,7 @@
"source": [
"%%px\n",
"\n",
"from GPUSimulators.model import HLL2\n",
"from GPUSimulators.helpers import InitialConditions\n",
"from GPUSimulators.Simulator import BoundaryCondition\n",
"\n",
@ -307,11 +296,9 @@
" 'grid': grid\n",
"}\n",
"\n",
"from GPUSimulators import HLL2\n",
"\n",
"\n",
"def gen_sim(grid, **kwargs):\n",
" local_sim = HLL2.HLL2(**kwargs)\n",
" local_sim = HLL2(**kwargs)\n",
" sim = MPISimulator.MPISimulator(local_sim, grid)\n",
" return sim\n",
"\n",

File diff suppressed because one or more lines are too long

View File

@ -36,7 +36,7 @@ import pycuda.driver as cuda
from GPUSimulators import MPISimulator
from GPUSimulators.common import run_simulation, get_git_hash, get_git_status
from GPUSimulators.gpu import CudaContext
from GPUSimulators import EE2D_KP07_dimsplit
from GPUSimulators.model import EE2DKP07Dimsplit
from GPUSimulators.helpers import InitialConditions as IC
import argparse
@ -137,7 +137,7 @@ logger.info("Running simulation")
def genSim(grid, **kwargs):
local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
local_sim = EE2DKP07Dimsplit(**kwargs)
sim = MPISimulator.MPISimulator(local_sim, grid)
return sim

View File

@ -25,8 +25,8 @@ import logging
# Simulator engine etc
from GPUSimulators import SHMEMSimulatorGroup
from GPUSimulators.common import common, run_simulation
from GPUSimulators import EE2D_KP07_dimsplit
from GPUSimulators.common import run_simulation
from GPUSimulators.model import EE2DKP07Dimsplit
from GPUSimulators.helpers import InitialConditions as IC
####
@ -83,7 +83,7 @@ for i in range(grid.ngpus):
arguments['context'] = grid.cuda_contexts[i]
arguments['theta'] = 1.2
sims.append(EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**arguments))
sims.append(EE2DKP07Dimsplit(**arguments))
# sims[i] = SHMEMSimulator(i, local_sim, grid) # 1st attempt: no wrapper (per sim)
arguments['sims'] = sims

View File

@ -29,7 +29,7 @@ import pycuda.driver as cuda
# Simulator engine etc
from GPUSimulators.common import run_simulation
from GPUSimulators.gpu import CudaContext
from GPUSimulators import EE2D_KP07_dimsplit
from GPUSimulators.model import EE2DKP07Dimsplit
from GPUSimulators.helpers import InitialConditions as IC
import argparse
@ -96,7 +96,7 @@ logger.info("Running simulation")
def gen_sim(**kwargs):
local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
local_sim = EE2DKP07Dimsplit(**kwargs)
return local_sim