mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-11-29 17:28:03 +01:00
Refactoring CudaArray and ArakawaA grid
This commit is contained in:
263
Autotuning.ipynb
263
Autotuning.ipynb
@@ -228,7 +228,7 @@
|
|||||||
"CUDA version (9, 1, 0)\n",
|
"CUDA version (9, 1, 0)\n",
|
||||||
"Driver version 9010\n",
|
"Driver version 9010\n",
|
||||||
"Using 'GeForce 840M' GPU\n",
|
"Using 'GeForce 840M' GPU\n",
|
||||||
"Created context handle <879048629408>\n",
|
"Created context handle <694827722560>\n",
|
||||||
"Using CUDA cache dir c:\\Users\\anbro\\Documents\\projects\\ShallowWaterGPU\\GPUSimulators\\cuda_cache\n",
|
"Using CUDA cache dir c:\\Users\\anbro\\Documents\\projects\\ShallowWaterGPU\\GPUSimulators\\cuda_cache\n",
|
||||||
"Autotuning enabled. It may take several minutes to run the code the first time: have patience\n"
|
"Autotuning enabled. It may take several minutes to run the code the first time: have patience\n"
|
||||||
]
|
]
|
||||||
@@ -247,13 +247,13 @@
|
|||||||
"name": "stderr",
|
"name": "stderr",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"gen_data: 3115.227938 ms\n"
|
"gen_data: 1647.211552 ms\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"<matplotlib.image.AxesImage at 0xccab2d4c18>"
|
"<matplotlib.image.AxesImage at 0xa1c91aa390>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 8,
|
"execution_count": 8,
|
||||||
@@ -328,14 +328,238 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 9,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"scrolled": false
|
"scrolled": false
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"LxF\n",
|
||||||
|
"[63x63] => 107.3 (0.000185)\n",
|
||||||
|
"[127x127] => 165.6 (0.000487)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"C:\\Users\\anbro\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in sqrt\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"[191x191] => 183.4 (0.000995)\n",
|
||||||
|
"[255x255] => 180.0 (0.001806)\n",
|
||||||
|
"[319x319] => 185.8 (0.002738)\n",
|
||||||
|
"[383x383] => 187.3 (0.003915)\n",
|
||||||
|
"[447x447] => 189.7 (0.005266)\n",
|
||||||
|
"[511x511] => 191.8 (0.006806)\n",
|
||||||
|
"[639x639] => 193.6 (0.010548)\n",
|
||||||
|
"[767x767] => 193.7 (0.015182)\n",
|
||||||
|
"[895x895] => 195.6 (0.020481)\n",
|
||||||
|
"[1023x1023] => 195.0 (0.026839)\n",
|
||||||
|
"[1151x1151] => 195.8 (0.033822)\n",
|
||||||
|
"[1279x1279] => 196.1 (0.041711)\n",
|
||||||
|
"[1407x1407] => 196.2 (0.050439)\n",
|
||||||
|
"[1535x1535] => 196.4 (0.059986)\n",
|
||||||
|
"[1663x1663] => 196.6 (0.070330)\n",
|
||||||
|
"[1791x1791] => 196.7 (0.081546)\n",
|
||||||
|
"[1919x1919] => 196.9 (0.093511)\n",
|
||||||
|
"[2047x2047] => 202.9 (0.103257)\n",
|
||||||
|
"[2303x2303] => 210.7 (0.125838)\n",
|
||||||
|
"[2559x2559] => 208.0 (0.157417)\n",
|
||||||
|
"[2815x2815] => 211.6 (0.187229)\n",
|
||||||
|
"[3071x3071] => 208.7 (0.225954)\n",
|
||||||
|
"[3327x3327] => 214.2 (0.258395)\n",
|
||||||
|
"[3583x3583] => 214.2 (0.299629)\n",
|
||||||
|
"[3839x3839] => 214.2 (0.343982)\n",
|
||||||
|
"[4095x4095] => 214.9 (0.390088)\n",
|
||||||
|
"FORCE\n",
|
||||||
|
"[63x63] => 94.3 (0.000210)\n",
|
||||||
|
"[127x127] => 136.5 (0.000591)\n",
|
||||||
|
"[191x191] => 147.0 (0.001241)\n",
|
||||||
|
"[255x255] => 148.5 (0.002189)\n",
|
||||||
|
"[319x319] => 151.6 (0.003357)\n",
|
||||||
|
"[383x383] => 153.0 (0.004793)\n",
|
||||||
|
"[447x447] => 153.9 (0.006494)\n",
|
||||||
|
"[511x511] => 155.0 (0.008421)\n",
|
||||||
|
"[639x639] => 156.4 (0.013056)\n",
|
||||||
|
"[767x767] => 156.5 (0.018790)\n",
|
||||||
|
"[895x895] => 157.0 (0.025514)\n",
|
||||||
|
"[1023x1023] => 143.6 (0.036450)\n",
|
||||||
|
"[1151x1151] => 143.6 (0.046115)\n",
|
||||||
|
"[1279x1279] => 143.8 (0.056865)\n",
|
||||||
|
"[1407x1407] => 143.9 (0.068797)\n",
|
||||||
|
"[1535x1535] => 144.0 (0.081832)\n",
|
||||||
|
"[1663x1663] => 144.0 (0.096007)\n",
|
||||||
|
"[1791x1791] => 144.0 (0.111343)\n",
|
||||||
|
"[1919x1919] => 144.2 (0.127712)\n",
|
||||||
|
"[2047x2047] => 151.7 (0.138153)\n",
|
||||||
|
"[2303x2303] => 147.3 (0.180021)\n",
|
||||||
|
"[2559x2559] => 154.3 (0.212248)\n",
|
||||||
|
"[2815x2815] => 158.3 (0.250279)\n",
|
||||||
|
"[3071x3071] => 156.9 (0.300547)\n",
|
||||||
|
"[3327x3327] => 158.4 (0.349353)\n",
|
||||||
|
"[3583x3583] => 158.4 (0.405175)\n",
|
||||||
|
"[3839x3839] => 158.4 (0.465201)\n",
|
||||||
|
"[4095x4095] => 158.4 (0.529337)\n",
|
||||||
|
"HLL\n",
|
||||||
|
"[63x63] => 65.7 (0.000302)\n",
|
||||||
|
"[127x127] => 98.6 (0.000818)\n",
|
||||||
|
"[191x191] => 108.1 (0.001688)\n",
|
||||||
|
"[255x255] => 109.2 (0.002977)\n",
|
||||||
|
"[319x319] => 111.9 (0.004546)\n",
|
||||||
|
"[383x383] => 113.2 (0.006482)\n",
|
||||||
|
"[447x447] => 113.7 (0.008785)\n",
|
||||||
|
"[511x511] => 114.4 (0.011411)\n",
|
||||||
|
"[639x639] => 115.3 (0.017713)\n",
|
||||||
|
"[767x767] => 115.6 (0.025454)\n",
|
||||||
|
"[895x895] => 105.7 (0.037888)\n",
|
||||||
|
"[1023x1023] => 105.8 (0.049473)\n",
|
||||||
|
"[1151x1151] => 105.9 (0.062558)\n",
|
||||||
|
"[1279x1279] => 106.0 (0.077148)\n",
|
||||||
|
"[1407x1407] => 106.1 (0.093290)\n",
|
||||||
|
"[1535x1535] => 109.8 (0.107271)\n",
|
||||||
|
"[1663x1663] => 106.2 (0.130195)\n",
|
||||||
|
"[1791x1791] => 107.7 (0.148973)\n",
|
||||||
|
"[1919x1919] => 115.0 (0.160104)\n",
|
||||||
|
"[2047x2047] => 113.3 (0.184913)\n",
|
||||||
|
"[2303x2303] => 111.9 (0.236908)\n",
|
||||||
|
"[2559x2559] => 116.6 (0.280840)\n",
|
||||||
|
"[2815x2815] => 116.6 (0.339777)\n",
|
||||||
|
"[3071x3071] => 116.6 (0.404268)\n",
|
||||||
|
"[3327x3327] => 116.6 (0.474572)\n",
|
||||||
|
"[3583x3583] => 116.7 (0.550240)\n",
|
||||||
|
"[3839x3839] => 116.7 (0.631563)\n",
|
||||||
|
"[4095x4095] => 116.7 (0.718161)\n",
|
||||||
|
"HLL2\n",
|
||||||
|
"[63x63] => 44.2 (0.000449)\n",
|
||||||
|
"[127x127] => 63.0 (0.001280)\n",
|
||||||
|
"[191x191] => 68.4 (0.002666)\n",
|
||||||
|
"[255x255] => 69.2 (0.004698)\n",
|
||||||
|
"[319x319] => 70.6 (0.007204)\n",
|
||||||
|
"[383x383] => 71.1 (0.010314)\n",
|
||||||
|
"[447x447] => 71.6 (0.013956)\n",
|
||||||
|
"[511x511] => 72.0 (0.018146)\n",
|
||||||
|
"[639x639] => 72.4 (0.028204)\n",
|
||||||
|
"[767x767] => 72.5 (0.040545)\n",
|
||||||
|
"[895x895] => 72.8 (0.055047)\n",
|
||||||
|
"[1023x1023] => 72.8 (0.071828)\n",
|
||||||
|
"[1151x1151] => 66.5 (0.099652)\n",
|
||||||
|
"[1279x1279] => 69.8 (0.117195)\n",
|
||||||
|
"[1407x1407] => 67.0 (0.147833)\n",
|
||||||
|
"[1535x1535] => 71.3 (0.165185)\n",
|
||||||
|
"[1663x1663] => 71.2 (0.194123)\n",
|
||||||
|
"[1791x1791] => 72.1 (0.222351)\n",
|
||||||
|
"[1919x1919] => 70.3 (0.261847)\n",
|
||||||
|
"[2047x2047] => 73.2 (0.286228)\n",
|
||||||
|
"[2303x2303] => 72.0 (0.368479)\n",
|
||||||
|
"[2559x2559] => 73.2 (0.447096)\n",
|
||||||
|
"[2815x2815] => 73.2 (0.541084)\n",
|
||||||
|
"[3071x3071] => 73.2 (0.643925)\n",
|
||||||
|
"[3327x3327] => 73.2 (0.755588)\n",
|
||||||
|
"[3583x3583] => 73.3 (0.876222)\n",
|
||||||
|
"[3839x3839] => 73.3 (1.005958)\n",
|
||||||
|
"[4095x4095] => 73.3 (1.144158)\n",
|
||||||
|
"KP07\n",
|
||||||
|
"[63x63] => 69.9 (0.000284)\n",
|
||||||
|
"[127x127] => 95.0 (0.000849)\n",
|
||||||
|
"[191x191] => 101.7 (0.001794)\n",
|
||||||
|
"[255x255] => 101.3 (0.003209)\n",
|
||||||
|
"[319x319] => 106.9 (0.004760)\n",
|
||||||
|
"[383x383] => 107.1 (0.006850)\n",
|
||||||
|
"[447x447] => 109.2 (0.009150)\n",
|
||||||
|
"[511x511] => 108.0 (0.012088)\n",
|
||||||
|
"[639x639] => 111.6 (0.018295)\n",
|
||||||
|
"[767x767] => 111.6 (0.026361)\n",
|
||||||
|
"[895x895] => 102.4 (0.039123)\n",
|
||||||
|
"[1023x1023] => 102.2 (0.051186)\n",
|
||||||
|
"[1151x1151] => 102.3 (0.064764)\n",
|
||||||
|
"[1279x1279] => 103.4 (0.079074)\n",
|
||||||
|
"[1407x1407] => 103.2 (0.095876)\n",
|
||||||
|
"[1535x1535] => 106.3 (0.110860)\n",
|
||||||
|
"[1663x1663] => 103.1 (0.134182)\n",
|
||||||
|
"[1791x1791] => 107.7 (0.148853)\n",
|
||||||
|
"[1919x1919] => 105.5 (0.174575)\n",
|
||||||
|
"[2047x2047] => 111.4 (0.188084)\n",
|
||||||
|
"[2303x2303] => 113.5 (0.233650)\n",
|
||||||
|
"[2559x2559] => 114.0 (0.287327)\n",
|
||||||
|
"[2815x2815] => 113.7 (0.348536)\n",
|
||||||
|
"[3071x3071] => 113.2 (0.416533)\n",
|
||||||
|
"[3327x3327] => 113.7 (0.486893)\n",
|
||||||
|
"[3583x3583] => 113.5 (0.565573)\n",
|
||||||
|
"[3839x3839] => 113.5 (0.649058)\n",
|
||||||
|
"[4095x4095] => 113.6 (0.738275)\n",
|
||||||
|
"KP07_dimsplit\n",
|
||||||
|
"[63x63] => 49.9 (0.000397)\n",
|
||||||
|
"[127x127] => 71.7 (0.001125)\n",
|
||||||
|
"[191x191] => 76.8 (0.002374)\n",
|
||||||
|
"[255x255] => 77.5 (0.004197)\n",
|
||||||
|
"[319x319] => 79.0 (0.006437)\n",
|
||||||
|
"[383x383] => 79.8 (0.009189)\n",
|
||||||
|
"[447x447] => 80.3 (0.012449)\n",
|
||||||
|
"[511x511] => 80.6 (0.016191)\n",
|
||||||
|
"[639x639] => 81.1 (0.025171)\n",
|
||||||
|
"[767x767] => 81.3 (0.036181)\n",
|
||||||
|
"[895x895] => 74.3 (0.053902)\n",
|
||||||
|
"[1023x1023] => 74.4 (0.070335)\n",
|
||||||
|
"[1151x1151] => 76.2 (0.086896)\n",
|
||||||
|
"[1279x1279] => 74.5 (0.109725)\n",
|
||||||
|
"[1407x1407] => 74.6 (0.132712)\n",
|
||||||
|
"[1535x1535] => 79.4 (0.148342)\n",
|
||||||
|
"[1663x1663] => 78.3 (0.176547)\n",
|
||||||
|
"[1791x1791] => 81.3 (0.197279)\n",
|
||||||
|
"[1919x1919] => 78.5 (0.234550)\n",
|
||||||
|
"[2047x2047] => 82.0 (0.255396)\n",
|
||||||
|
"[2303x2303] => 81.0 (0.327297)\n",
|
||||||
|
"[2559x2559] => 82.0 (0.399197)\n",
|
||||||
|
"[2815x2815] => 82.0 (0.483034)\n",
|
||||||
|
"[3071x3071] => 82.0 (0.574737)\n",
|
||||||
|
"[3327x3327] => 82.1 (0.674395)\n",
|
||||||
|
"[3583x3583] => 82.1 (0.782180)\n",
|
||||||
|
"[3839x3839] => 82.1 (0.897551)\n",
|
||||||
|
"[4095x4095] => 82.1 (1.020911)\n",
|
||||||
|
"WAF\n",
|
||||||
|
"[63x63] => 32.8 (0.000605)\n",
|
||||||
|
"[127x127] => 45.6 (0.001768)\n",
|
||||||
|
"[191x191] => 53.9 (0.003381)\n",
|
||||||
|
"[255x255] => 54.3 (0.005985)\n",
|
||||||
|
"[319x319] => 57.7 (0.008821)\n",
|
||||||
|
"[383x383] => 56.9 (0.012893)\n",
|
||||||
|
"[447x447] => 59.3 (0.016840)\n",
|
||||||
|
"[511x511] => 58.8 (0.022214)\n",
|
||||||
|
"[639x639] => 59.6 (0.034278)\n",
|
||||||
|
"[767x767] => 60.1 (0.048942)\n",
|
||||||
|
"[895x895] => 55.3 (0.072483)\n",
|
||||||
|
"[1023x1023] => 55.4 (0.094402)\n",
|
||||||
|
"[1151x1151] => 55.7 (0.119006)\n",
|
||||||
|
"[1279x1279] => 55.0 (0.148746)\n",
|
||||||
|
"[1407x1407] => 55.8 (0.177399)\n",
|
||||||
|
"[1535x1535] => 58.7 (0.200663)\n",
|
||||||
|
"[1663x1663] => 57.8 (0.239299)\n",
|
||||||
|
"[1791x1791] => 59.6 (0.269144)\n",
|
||||||
|
"[1919x1919] => 61.1 (0.301218)\n",
|
||||||
|
"[2047x2047] => 61.2 (0.342070)\n",
|
||||||
|
"[2303x2303] => 61.3 (0.432280)\n",
|
||||||
|
"[2559x2559] => 61.0 (0.537125)\n",
|
||||||
|
"[2815x2815] => 61.1 (0.648336)\n",
|
||||||
|
"[3071x3071] => 61.3 (0.769734)\n",
|
||||||
|
"[3327x3327] => 61.4 (0.901199)\n",
|
||||||
|
"[3583x3583] => 61.1 (1.049726)\n",
|
||||||
|
"[3839x3839] => 61.3 (1.202961)\n",
|
||||||
|
"[4095x4095] => 61.4 (1.366446)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"run_simulation = False\n",
|
"run_simulation = True\n",
|
||||||
"sizes = list(range(64, 512, 64)) + list(range(512, 2048, 128)) + list(range(2048, 4096, 256)) + [4096]\n",
|
"sizes = list(range(64, 512, 64)) + list(range(512, 2048, 128)) + list(range(2048, 4096, 256)) + [4096]\n",
|
||||||
|
"simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n",
|
||||||
"if (run_simulation):\n",
|
"if (run_simulation):\n",
|
||||||
" megacells = {}\n",
|
" megacells = {}\n",
|
||||||
" for simulator in simulators:\n",
|
" for simulator in simulators:\n",
|
||||||
@@ -388,7 +612,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 13,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@@ -412,7 +636,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 11,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"scrolled": false
|
"scrolled": false
|
||||||
},
|
},
|
||||||
@@ -423,7 +647,7 @@
|
|||||||
"Text(0.5,0,'nx')"
|
"Text(0.5,0,'nx')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 14,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
},
|
},
|
||||||
@@ -450,7 +674,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@@ -459,7 +683,7 @@
|
|||||||
"Text(0.5,0,'nx')"
|
"Text(0.5,0,'nx')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 15,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
},
|
},
|
||||||
@@ -487,6 +711,23 @@
|
|||||||
"plt.xlabel(\"nx\")"
|
"plt.xlabel(\"nx\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"False\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(type(None) == None)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
|
|||||||
@@ -84,7 +84,6 @@ class CudaContext(object):
|
|||||||
self.cuda_device = cuda.Device(0)
|
self.cuda_device = cuda.Device(0)
|
||||||
self.logger.info("Using '%s' GPU", self.cuda_device.name())
|
self.logger.info("Using '%s' GPU", self.cuda_device.name())
|
||||||
self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
|
self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
|
||||||
self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))
|
|
||||||
|
|
||||||
# Create the CUDA context
|
# Create the CUDA context
|
||||||
if (self.blocking):
|
if (self.blocking):
|
||||||
@@ -92,6 +91,9 @@ class CudaContext(object):
|
|||||||
self.logger.warning("Using blocking context")
|
self.logger.warning("Using blocking context")
|
||||||
else:
|
else:
|
||||||
self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
|
self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
|
||||||
|
|
||||||
|
free, total = cuda.mem_get_info()
|
||||||
|
self.logger.debug(" => memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
|
||||||
|
|
||||||
self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))
|
self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))
|
||||||
|
|
||||||
@@ -294,7 +296,7 @@ class CudaArray2D:
|
|||||||
"""
|
"""
|
||||||
Uploads initial data to the CL device
|
Uploads initial data to the CL device
|
||||||
"""
|
"""
|
||||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data):
|
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.nx = nx
|
self.nx = nx
|
||||||
self.ny = ny
|
self.ny = ny
|
||||||
@@ -307,16 +309,18 @@ class CudaArray2D:
|
|||||||
#self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
#self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
||||||
|
|
||||||
#Make sure data is in proper format
|
#Make sure data is in proper format
|
||||||
assert np.issubdtype(cpu_data.dtype, np.float32), "Wrong datatype: %s" % str(cpu_data.dtype)
|
if cpu_data is not None:
|
||||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||||
|
|
||||||
#Upload data to the device
|
#Upload data to the device
|
||||||
if (cpu_data.shape == (ny_halo, nx_halo)):
|
if cpu_data is None:
|
||||||
|
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), dtype)
|
||||||
|
elif (cpu_data.shape == (ny_halo, nx_halo)):
|
||||||
self.data = pycuda.gpuarray.to_gpu_async(cpu_data, stream=stream)
|
self.data = pycuda.gpuarray.to_gpu_async(cpu_data, stream=stream)
|
||||||
elif (cpu_data.shape == (self.ny, self.nx)):
|
elif (cpu_data.shape == (self.ny, self.nx)):
|
||||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), cpu_data.dtype)
|
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), dtype)
|
||||||
#self.data.fill(0.0)
|
#self.data.fill(0.0)
|
||||||
|
|
||||||
#Create copy object from host to device
|
#Create copy object from host to device
|
||||||
@@ -337,7 +341,6 @@ class CudaArray2D:
|
|||||||
#Perform the copy
|
#Perform the copy
|
||||||
copy(stream)
|
copy(stream)
|
||||||
stream.synchronize()
|
stream.synchronize()
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert False, "Wrong data shape: %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo)))
|
assert False, "Wrong data shape: %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo)))
|
||||||
|
|
||||||
@@ -390,36 +393,31 @@ class CudaArray2D:
|
|||||||
"""
|
"""
|
||||||
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
||||||
"""
|
"""
|
||||||
class SWEDataArakawaA:
|
class ArakawaA2D:
|
||||||
"""
|
"""
|
||||||
Uploads initial data to the CL device
|
Uploads initial data to the CL device
|
||||||
"""
|
"""
|
||||||
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
self.h0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
self.gpu_variables = []
|
||||||
self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
for cpu_variable in cpu_variables:
|
||||||
self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
|
||||||
|
|
||||||
self.h1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
|
||||||
self.hu1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
|
||||||
self.hv1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
|
||||||
|
|
||||||
"""
|
|
||||||
Swaps the variables after a timestep has been completed
|
|
||||||
"""
|
|
||||||
def swap(self):
|
|
||||||
self.h1, self.h0 = self.h0, self.h1
|
|
||||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
|
||||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
assert type(key) == int, "Indexing is int based"
|
||||||
|
if (key > len(self.gpu_variables) or key < 0):
|
||||||
|
raise IndexError("Out of bounds")
|
||||||
|
return self.gpu_variables[key]
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Enables downloading data from CL device to Python
|
Enables downloading data from CL device to Python
|
||||||
"""
|
"""
|
||||||
def download(self, stream):
|
def download(self, stream):
|
||||||
h_cpu = self.h0.download(stream, async=True)
|
cpu_variables = []
|
||||||
hu_cpu = self.hu0.download(stream, async=True)
|
for gpu_variable in self.gpu_variables:
|
||||||
hv_cpu = self.hv0.download(stream, async=False)
|
cpu_variables += [gpu_variable.download(stream, async=True)]
|
||||||
|
stream.synchronize()
|
||||||
return h_cpu, hu_cpu, hv_cpu
|
return cpu_variables
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -60,9 +60,7 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
1, 1, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -73,6 +71,16 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "First order centered"
|
return "First order centered"
|
||||||
|
|
||||||
@@ -84,13 +92,14 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.u0.download(self.stream)
|
||||||
@@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -55,9 +55,7 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
1, 1, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -68,6 +66,16 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer"
|
return "Harten-Lax-van Leer"
|
||||||
|
|
||||||
@@ -79,13 +87,14 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.u0.download(self.stream)
|
||||||
@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -59,9 +59,7 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
2, 2, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -74,6 +72,16 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Harten-Lax-van Leer (2nd order)"
|
return "Harten-Lax-van Leer (2nd order)"
|
||||||
|
|
||||||
@@ -90,13 +98,13 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
@@ -106,12 +114,14 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.u0.download(self.stream)
|
||||||
@@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -60,9 +60,7 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
2, 2, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -75,6 +73,16 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova 2007"
|
return "Kurganov-Petrova 2007"
|
||||||
|
|
||||||
@@ -88,13 +96,13 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(substep), \
|
np.int32(substep), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def stepEuler(self, dt):
|
def stepEuler(self, dt):
|
||||||
self.substepRK(dt, 0)
|
self.substepRK(dt, 0)
|
||||||
@@ -108,5 +116,4 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.data.download(self.stream)
|
return self.u0.download(self.stream)
|
||||||
|
|
||||||
@@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -60,9 +60,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
2, 2, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -75,6 +73,16 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Kurganov-Petrova 2007 dimensionally split"
|
return "Kurganov-Petrova 2007 dimensionally split"
|
||||||
|
|
||||||
@@ -91,13 +99,13 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
@@ -107,13 +115,14 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
|||||||
self.g, \
|
self.g, \
|
||||||
self.theta, \
|
self.theta, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.u0.download(self.stream)
|
||||||
@@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -56,9 +56,7 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
1, 1, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -68,6 +66,16 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
"iiffffPiPiPiPiPiPi", \
|
"iiffffPiPiPiPiPiPi", \
|
||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
1, 1, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Lax Friedrichs"
|
return "Lax Friedrichs"
|
||||||
@@ -80,13 +88,14 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.u0.download(self.stream)
|
||||||
|
|||||||
@@ -48,9 +48,7 @@ class BaseSimulator:
|
|||||||
"""
|
"""
|
||||||
def __init__(self, \
|
def __init__(self, \
|
||||||
context, \
|
context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
ghost_cells_x, ghost_cells_y, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height):
|
block_width, block_height):
|
||||||
@@ -67,14 +65,6 @@ class BaseSimulator:
|
|||||||
|
|
||||||
#Create a CUDA stream
|
#Create a CUDA stream
|
||||||
self.stream = cuda.Stream()
|
self.stream = cuda.Stream()
|
||||||
|
|
||||||
#Create data by uploading to device
|
|
||||||
free, total = cuda.mem_get_info()
|
|
||||||
self.logger.debug("GPU memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
|
|
||||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
|
||||||
nx, ny, \
|
|
||||||
ghost_cells_x, ghost_cells_y, \
|
|
||||||
h0, hu0, hv0)
|
|
||||||
|
|
||||||
#Save input parameters
|
#Save input parameters
|
||||||
#Notice that we need to specify them in the correct dataformat for the
|
#Notice that we need to specify them in the correct dataformat for the
|
||||||
@@ -94,7 +84,7 @@ class BaseSimulator:
|
|||||||
self.global_size = ( \
|
self.global_size = ( \
|
||||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||||
)
|
)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Function which simulates forward in time using the default simulation type
|
Function which simulates forward in time using the default simulation type
|
||||||
@@ -192,7 +182,7 @@ class BaseSimulator:
|
|||||||
return self.t
|
return self.t
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
return self.data.download(self.stream)
|
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||||
|
|
||||||
def synchronize(self):
|
def synchronize(self):
|
||||||
self.stream.synchronize()
|
self.stream.synchronize()
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
#Import packages we need
|
#Import packages we need
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPUSimulators import Simulator
|
from GPUSimulators import Simulator, Common
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -55,9 +55,7 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
super().__init__(context, \
|
super().__init__(context, \
|
||||||
h0, hu0, hv0, \
|
|
||||||
nx, ny, \
|
nx, ny, \
|
||||||
2, 2, \
|
|
||||||
dx, dy, dt, \
|
dx, dy, dt, \
|
||||||
g, \
|
g, \
|
||||||
block_width, block_height);
|
block_width, block_height);
|
||||||
@@ -68,6 +66,16 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
BLOCK_WIDTH=self.local_size[0], \
|
BLOCK_WIDTH=self.local_size[0], \
|
||||||
BLOCK_HEIGHT=self.local_size[1])
|
BLOCK_HEIGHT=self.local_size[1])
|
||||||
|
|
||||||
|
#Create data by uploading to device
|
||||||
|
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[h0, hu0, hv0])
|
||||||
|
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||||
|
nx, ny, \
|
||||||
|
2, 2, \
|
||||||
|
[None, None, None])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return "Weighted average flux"
|
return "Weighted average flux"
|
||||||
|
|
||||||
@@ -79,30 +87,33 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
|
|
||||||
def stepDimsplitXY(self, dt):
|
def stepDimsplitXY(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(0), \
|
np.int32(0), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
def stepDimsplitYX(self, dt):
|
def stepDimsplitYX(self, dt):
|
||||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||||
self.nx, self.ny, \
|
self.nx, self.ny, \
|
||||||
self.dx, self.dy, dt, \
|
self.dx, self.dy, dt, \
|
||||||
self.g, \
|
self.g, \
|
||||||
np.int32(1), \
|
np.int32(1), \
|
||||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||||
self.data.swap()
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
self.t += dt
|
self.t += dt
|
||||||
|
|
||||||
|
def download(self):
|
||||||
|
return self.u0.download(self.stream)
|
||||||
Reference in New Issue
Block a user