mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-07-03 15:51:01 +02:00
Refactoring CudaArray and ArakawaA grid
This commit is contained in:
parent
5668e28f99
commit
918d22b257
263
Autotuning.ipynb
263
Autotuning.ipynb
@ -228,7 +228,7 @@
|
||||
"CUDA version (9, 1, 0)\n",
|
||||
"Driver version 9010\n",
|
||||
"Using 'GeForce 840M' GPU\n",
|
||||
"Created context handle <879048629408>\n",
|
||||
"Created context handle <694827722560>\n",
|
||||
"Using CUDA cache dir c:\\Users\\anbro\\Documents\\projects\\ShallowWaterGPU\\GPUSimulators\\cuda_cache\n",
|
||||
"Autotuning enabled. It may take several minutes to run the code the first time: have patience\n"
|
||||
]
|
||||
@ -247,13 +247,13 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"gen_data: 3115.227938 ms\n"
|
||||
"gen_data: 1647.211552 ms\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<matplotlib.image.AxesImage at 0xccab2d4c18>"
|
||||
"<matplotlib.image.AxesImage at 0xa1c91aa390>"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
@ -328,14 +328,238 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"LxF\n",
|
||||
"[63x63] => 107.3 (0.000185)\n",
|
||||
"[127x127] => 165.6 (0.000487)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"C:\\Users\\anbro\\AppData\\Local\\Continuum\\anaconda3\\lib\\site-packages\\ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in sqrt\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[191x191] => 183.4 (0.000995)\n",
|
||||
"[255x255] => 180.0 (0.001806)\n",
|
||||
"[319x319] => 185.8 (0.002738)\n",
|
||||
"[383x383] => 187.3 (0.003915)\n",
|
||||
"[447x447] => 189.7 (0.005266)\n",
|
||||
"[511x511] => 191.8 (0.006806)\n",
|
||||
"[639x639] => 193.6 (0.010548)\n",
|
||||
"[767x767] => 193.7 (0.015182)\n",
|
||||
"[895x895] => 195.6 (0.020481)\n",
|
||||
"[1023x1023] => 195.0 (0.026839)\n",
|
||||
"[1151x1151] => 195.8 (0.033822)\n",
|
||||
"[1279x1279] => 196.1 (0.041711)\n",
|
||||
"[1407x1407] => 196.2 (0.050439)\n",
|
||||
"[1535x1535] => 196.4 (0.059986)\n",
|
||||
"[1663x1663] => 196.6 (0.070330)\n",
|
||||
"[1791x1791] => 196.7 (0.081546)\n",
|
||||
"[1919x1919] => 196.9 (0.093511)\n",
|
||||
"[2047x2047] => 202.9 (0.103257)\n",
|
||||
"[2303x2303] => 210.7 (0.125838)\n",
|
||||
"[2559x2559] => 208.0 (0.157417)\n",
|
||||
"[2815x2815] => 211.6 (0.187229)\n",
|
||||
"[3071x3071] => 208.7 (0.225954)\n",
|
||||
"[3327x3327] => 214.2 (0.258395)\n",
|
||||
"[3583x3583] => 214.2 (0.299629)\n",
|
||||
"[3839x3839] => 214.2 (0.343982)\n",
|
||||
"[4095x4095] => 214.9 (0.390088)\n",
|
||||
"FORCE\n",
|
||||
"[63x63] => 94.3 (0.000210)\n",
|
||||
"[127x127] => 136.5 (0.000591)\n",
|
||||
"[191x191] => 147.0 (0.001241)\n",
|
||||
"[255x255] => 148.5 (0.002189)\n",
|
||||
"[319x319] => 151.6 (0.003357)\n",
|
||||
"[383x383] => 153.0 (0.004793)\n",
|
||||
"[447x447] => 153.9 (0.006494)\n",
|
||||
"[511x511] => 155.0 (0.008421)\n",
|
||||
"[639x639] => 156.4 (0.013056)\n",
|
||||
"[767x767] => 156.5 (0.018790)\n",
|
||||
"[895x895] => 157.0 (0.025514)\n",
|
||||
"[1023x1023] => 143.6 (0.036450)\n",
|
||||
"[1151x1151] => 143.6 (0.046115)\n",
|
||||
"[1279x1279] => 143.8 (0.056865)\n",
|
||||
"[1407x1407] => 143.9 (0.068797)\n",
|
||||
"[1535x1535] => 144.0 (0.081832)\n",
|
||||
"[1663x1663] => 144.0 (0.096007)\n",
|
||||
"[1791x1791] => 144.0 (0.111343)\n",
|
||||
"[1919x1919] => 144.2 (0.127712)\n",
|
||||
"[2047x2047] => 151.7 (0.138153)\n",
|
||||
"[2303x2303] => 147.3 (0.180021)\n",
|
||||
"[2559x2559] => 154.3 (0.212248)\n",
|
||||
"[2815x2815] => 158.3 (0.250279)\n",
|
||||
"[3071x3071] => 156.9 (0.300547)\n",
|
||||
"[3327x3327] => 158.4 (0.349353)\n",
|
||||
"[3583x3583] => 158.4 (0.405175)\n",
|
||||
"[3839x3839] => 158.4 (0.465201)\n",
|
||||
"[4095x4095] => 158.4 (0.529337)\n",
|
||||
"HLL\n",
|
||||
"[63x63] => 65.7 (0.000302)\n",
|
||||
"[127x127] => 98.6 (0.000818)\n",
|
||||
"[191x191] => 108.1 (0.001688)\n",
|
||||
"[255x255] => 109.2 (0.002977)\n",
|
||||
"[319x319] => 111.9 (0.004546)\n",
|
||||
"[383x383] => 113.2 (0.006482)\n",
|
||||
"[447x447] => 113.7 (0.008785)\n",
|
||||
"[511x511] => 114.4 (0.011411)\n",
|
||||
"[639x639] => 115.3 (0.017713)\n",
|
||||
"[767x767] => 115.6 (0.025454)\n",
|
||||
"[895x895] => 105.7 (0.037888)\n",
|
||||
"[1023x1023] => 105.8 (0.049473)\n",
|
||||
"[1151x1151] => 105.9 (0.062558)\n",
|
||||
"[1279x1279] => 106.0 (0.077148)\n",
|
||||
"[1407x1407] => 106.1 (0.093290)\n",
|
||||
"[1535x1535] => 109.8 (0.107271)\n",
|
||||
"[1663x1663] => 106.2 (0.130195)\n",
|
||||
"[1791x1791] => 107.7 (0.148973)\n",
|
||||
"[1919x1919] => 115.0 (0.160104)\n",
|
||||
"[2047x2047] => 113.3 (0.184913)\n",
|
||||
"[2303x2303] => 111.9 (0.236908)\n",
|
||||
"[2559x2559] => 116.6 (0.280840)\n",
|
||||
"[2815x2815] => 116.6 (0.339777)\n",
|
||||
"[3071x3071] => 116.6 (0.404268)\n",
|
||||
"[3327x3327] => 116.6 (0.474572)\n",
|
||||
"[3583x3583] => 116.7 (0.550240)\n",
|
||||
"[3839x3839] => 116.7 (0.631563)\n",
|
||||
"[4095x4095] => 116.7 (0.718161)\n",
|
||||
"HLL2\n",
|
||||
"[63x63] => 44.2 (0.000449)\n",
|
||||
"[127x127] => 63.0 (0.001280)\n",
|
||||
"[191x191] => 68.4 (0.002666)\n",
|
||||
"[255x255] => 69.2 (0.004698)\n",
|
||||
"[319x319] => 70.6 (0.007204)\n",
|
||||
"[383x383] => 71.1 (0.010314)\n",
|
||||
"[447x447] => 71.6 (0.013956)\n",
|
||||
"[511x511] => 72.0 (0.018146)\n",
|
||||
"[639x639] => 72.4 (0.028204)\n",
|
||||
"[767x767] => 72.5 (0.040545)\n",
|
||||
"[895x895] => 72.8 (0.055047)\n",
|
||||
"[1023x1023] => 72.8 (0.071828)\n",
|
||||
"[1151x1151] => 66.5 (0.099652)\n",
|
||||
"[1279x1279] => 69.8 (0.117195)\n",
|
||||
"[1407x1407] => 67.0 (0.147833)\n",
|
||||
"[1535x1535] => 71.3 (0.165185)\n",
|
||||
"[1663x1663] => 71.2 (0.194123)\n",
|
||||
"[1791x1791] => 72.1 (0.222351)\n",
|
||||
"[1919x1919] => 70.3 (0.261847)\n",
|
||||
"[2047x2047] => 73.2 (0.286228)\n",
|
||||
"[2303x2303] => 72.0 (0.368479)\n",
|
||||
"[2559x2559] => 73.2 (0.447096)\n",
|
||||
"[2815x2815] => 73.2 (0.541084)\n",
|
||||
"[3071x3071] => 73.2 (0.643925)\n",
|
||||
"[3327x3327] => 73.2 (0.755588)\n",
|
||||
"[3583x3583] => 73.3 (0.876222)\n",
|
||||
"[3839x3839] => 73.3 (1.005958)\n",
|
||||
"[4095x4095] => 73.3 (1.144158)\n",
|
||||
"KP07\n",
|
||||
"[63x63] => 69.9 (0.000284)\n",
|
||||
"[127x127] => 95.0 (0.000849)\n",
|
||||
"[191x191] => 101.7 (0.001794)\n",
|
||||
"[255x255] => 101.3 (0.003209)\n",
|
||||
"[319x319] => 106.9 (0.004760)\n",
|
||||
"[383x383] => 107.1 (0.006850)\n",
|
||||
"[447x447] => 109.2 (0.009150)\n",
|
||||
"[511x511] => 108.0 (0.012088)\n",
|
||||
"[639x639] => 111.6 (0.018295)\n",
|
||||
"[767x767] => 111.6 (0.026361)\n",
|
||||
"[895x895] => 102.4 (0.039123)\n",
|
||||
"[1023x1023] => 102.2 (0.051186)\n",
|
||||
"[1151x1151] => 102.3 (0.064764)\n",
|
||||
"[1279x1279] => 103.4 (0.079074)\n",
|
||||
"[1407x1407] => 103.2 (0.095876)\n",
|
||||
"[1535x1535] => 106.3 (0.110860)\n",
|
||||
"[1663x1663] => 103.1 (0.134182)\n",
|
||||
"[1791x1791] => 107.7 (0.148853)\n",
|
||||
"[1919x1919] => 105.5 (0.174575)\n",
|
||||
"[2047x2047] => 111.4 (0.188084)\n",
|
||||
"[2303x2303] => 113.5 (0.233650)\n",
|
||||
"[2559x2559] => 114.0 (0.287327)\n",
|
||||
"[2815x2815] => 113.7 (0.348536)\n",
|
||||
"[3071x3071] => 113.2 (0.416533)\n",
|
||||
"[3327x3327] => 113.7 (0.486893)\n",
|
||||
"[3583x3583] => 113.5 (0.565573)\n",
|
||||
"[3839x3839] => 113.5 (0.649058)\n",
|
||||
"[4095x4095] => 113.6 (0.738275)\n",
|
||||
"KP07_dimsplit\n",
|
||||
"[63x63] => 49.9 (0.000397)\n",
|
||||
"[127x127] => 71.7 (0.001125)\n",
|
||||
"[191x191] => 76.8 (0.002374)\n",
|
||||
"[255x255] => 77.5 (0.004197)\n",
|
||||
"[319x319] => 79.0 (0.006437)\n",
|
||||
"[383x383] => 79.8 (0.009189)\n",
|
||||
"[447x447] => 80.3 (0.012449)\n",
|
||||
"[511x511] => 80.6 (0.016191)\n",
|
||||
"[639x639] => 81.1 (0.025171)\n",
|
||||
"[767x767] => 81.3 (0.036181)\n",
|
||||
"[895x895] => 74.3 (0.053902)\n",
|
||||
"[1023x1023] => 74.4 (0.070335)\n",
|
||||
"[1151x1151] => 76.2 (0.086896)\n",
|
||||
"[1279x1279] => 74.5 (0.109725)\n",
|
||||
"[1407x1407] => 74.6 (0.132712)\n",
|
||||
"[1535x1535] => 79.4 (0.148342)\n",
|
||||
"[1663x1663] => 78.3 (0.176547)\n",
|
||||
"[1791x1791] => 81.3 (0.197279)\n",
|
||||
"[1919x1919] => 78.5 (0.234550)\n",
|
||||
"[2047x2047] => 82.0 (0.255396)\n",
|
||||
"[2303x2303] => 81.0 (0.327297)\n",
|
||||
"[2559x2559] => 82.0 (0.399197)\n",
|
||||
"[2815x2815] => 82.0 (0.483034)\n",
|
||||
"[3071x3071] => 82.0 (0.574737)\n",
|
||||
"[3327x3327] => 82.1 (0.674395)\n",
|
||||
"[3583x3583] => 82.1 (0.782180)\n",
|
||||
"[3839x3839] => 82.1 (0.897551)\n",
|
||||
"[4095x4095] => 82.1 (1.020911)\n",
|
||||
"WAF\n",
|
||||
"[63x63] => 32.8 (0.000605)\n",
|
||||
"[127x127] => 45.6 (0.001768)\n",
|
||||
"[191x191] => 53.9 (0.003381)\n",
|
||||
"[255x255] => 54.3 (0.005985)\n",
|
||||
"[319x319] => 57.7 (0.008821)\n",
|
||||
"[383x383] => 56.9 (0.012893)\n",
|
||||
"[447x447] => 59.3 (0.016840)\n",
|
||||
"[511x511] => 58.8 (0.022214)\n",
|
||||
"[639x639] => 59.6 (0.034278)\n",
|
||||
"[767x767] => 60.1 (0.048942)\n",
|
||||
"[895x895] => 55.3 (0.072483)\n",
|
||||
"[1023x1023] => 55.4 (0.094402)\n",
|
||||
"[1151x1151] => 55.7 (0.119006)\n",
|
||||
"[1279x1279] => 55.0 (0.148746)\n",
|
||||
"[1407x1407] => 55.8 (0.177399)\n",
|
||||
"[1535x1535] => 58.7 (0.200663)\n",
|
||||
"[1663x1663] => 57.8 (0.239299)\n",
|
||||
"[1791x1791] => 59.6 (0.269144)\n",
|
||||
"[1919x1919] => 61.1 (0.301218)\n",
|
||||
"[2047x2047] => 61.2 (0.342070)\n",
|
||||
"[2303x2303] => 61.3 (0.432280)\n",
|
||||
"[2559x2559] => 61.0 (0.537125)\n",
|
||||
"[2815x2815] => 61.1 (0.648336)\n",
|
||||
"[3071x3071] => 61.3 (0.769734)\n",
|
||||
"[3327x3327] => 61.4 (0.901199)\n",
|
||||
"[3583x3583] => 61.1 (1.049726)\n",
|
||||
"[3839x3839] => 61.3 (1.202961)\n",
|
||||
"[4095x4095] => 61.4 (1.366446)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"run_simulation = False\n",
|
||||
"run_simulation = True\n",
|
||||
"sizes = list(range(64, 512, 64)) + list(range(512, 2048, 128)) + list(range(2048, 4096, 256)) + [4096]\n",
|
||||
"simulators = [LxF.LxF, FORCE.FORCE, HLL.HLL, HLL2.HLL2, KP07.KP07, KP07_dimsplit.KP07_dimsplit, WAF.WAF]\n",
|
||||
"if (run_simulation):\n",
|
||||
" megacells = {}\n",
|
||||
" for simulator in simulators:\n",
|
||||
@ -388,7 +612,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -412,7 +636,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 11,
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
@ -423,7 +647,7 @@
|
||||
"Text(0.5,0,'nx')"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@ -450,7 +674,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -459,7 +683,7 @@
|
||||
"Text(0.5,0,'nx')"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
},
|
||||
@ -487,6 +711,23 @@
|
||||
"plt.xlabel(\"nx\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"False\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(type(None) == None)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
|
@ -84,7 +84,6 @@ class CudaContext(object):
|
||||
self.cuda_device = cuda.Device(0)
|
||||
self.logger.info("Using '%s' GPU", self.cuda_device.name())
|
||||
self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
|
||||
self.logger.debug(" => memory: %d MB", self.cuda_device.total_memory() / (1024*1024))
|
||||
|
||||
# Create the CUDA context
|
||||
if (self.blocking):
|
||||
@ -92,6 +91,9 @@ class CudaContext(object):
|
||||
self.logger.warning("Using blocking context")
|
||||
else:
|
||||
self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
|
||||
|
||||
free, total = cuda.mem_get_info()
|
||||
self.logger.debug(" => memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
|
||||
|
||||
self.logger.info("Created context handle <%s>", str(self.cuda_context.handle))
|
||||
|
||||
@ -294,7 +296,7 @@ class CudaArray2D:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data):
|
||||
def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.nx = nx
|
||||
self.ny = ny
|
||||
@ -307,16 +309,18 @@ class CudaArray2D:
|
||||
#self.logger.debug("Allocating [%dx%d] buffer", self.nx, self.ny)
|
||||
|
||||
#Make sure data is in proper format
|
||||
assert np.issubdtype(cpu_data.dtype, np.float32), "Wrong datatype: %s" % str(cpu_data.dtype)
|
||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||
if cpu_data is not None:
|
||||
assert cpu_data.itemsize == 4, "Wrong size of data type"
|
||||
assert not np.isfortran(cpu_data), "Wrong datatype (Fortran, expected C)"
|
||||
|
||||
#Upload data to the device
|
||||
if (cpu_data.shape == (ny_halo, nx_halo)):
|
||||
if cpu_data is None:
|
||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), dtype)
|
||||
elif (cpu_data.shape == (ny_halo, nx_halo)):
|
||||
self.data = pycuda.gpuarray.to_gpu_async(cpu_data, stream=stream)
|
||||
elif (cpu_data.shape == (self.ny, self.nx)):
|
||||
#Should perhaps use pycuda.driver.mem_alloc_data.pitch() here
|
||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), cpu_data.dtype)
|
||||
self.data = pycuda.gpuarray.empty((ny_halo, nx_halo), dtype)
|
||||
#self.data.fill(0.0)
|
||||
|
||||
#Create copy object from host to device
|
||||
@ -337,7 +341,6 @@ class CudaArray2D:
|
||||
#Perform the copy
|
||||
copy(stream)
|
||||
stream.synchronize()
|
||||
|
||||
else:
|
||||
assert False, "Wrong data shape: %s vs %s / %s" % (str(cpu_data.shape), str((self.ny, self.nx)), str((ny_halo, nx_halo)))
|
||||
|
||||
@ -390,36 +393,31 @@ class CudaArray2D:
|
||||
"""
|
||||
A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
|
||||
"""
|
||||
class SWEDataArakawaA:
|
||||
class ArakawaA2D:
|
||||
"""
|
||||
Uploads initial data to the CL device
|
||||
"""
|
||||
def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
|
||||
def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.h0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
self.h1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
|
||||
self.hu1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
|
||||
self.hv1 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
|
||||
|
||||
"""
|
||||
Swaps the variables after a timestep has been completed
|
||||
"""
|
||||
def swap(self):
|
||||
self.h1, self.h0 = self.h0, self.h1
|
||||
self.hu1, self.hu0 = self.hu0, self.hu1
|
||||
self.hv1, self.hv0 = self.hv0, self.hv1
|
||||
self.gpu_variables = []
|
||||
for cpu_variable in cpu_variables:
|
||||
self.gpu_variables += [CudaArray2D(stream, nx, ny, halo_x, halo_y, cpu_variable)]
|
||||
|
||||
def __getitem__(self, key):
|
||||
assert type(key) == int, "Indexing is int based"
|
||||
if (key > len(self.gpu_variables) or key < 0):
|
||||
raise IndexError("Out of bounds")
|
||||
return self.gpu_variables[key]
|
||||
|
||||
"""
|
||||
Enables downloading data from CL device to Python
|
||||
"""
|
||||
def download(self, stream):
|
||||
h_cpu = self.h0.download(stream, async=True)
|
||||
hu_cpu = self.hu0.download(stream, async=True)
|
||||
hv_cpu = self.hv0.download(stream, async=False)
|
||||
|
||||
return h_cpu, hu_cpu, hv_cpu
|
||||
cpu_variables = []
|
||||
for gpu_variable in self.gpu_variables:
|
||||
cpu_variables += [gpu_variable.download(stream, async=True)]
|
||||
stream.synchronize()
|
||||
return cpu_variables
|
||||
|
||||
|
||||
|
@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -60,9 +60,7 @@ class FORCE (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -73,6 +71,16 @@ class FORCE (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "First order centered"
|
||||
|
||||
@ -84,13 +92,14 @@ class FORCE (Simulator.BaseSimulator):
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
@ -20,7 +20,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -55,9 +55,7 @@ class HLL (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -68,6 +66,16 @@ class HLL (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Harten-Lax-van Leer"
|
||||
|
||||
@ -79,13 +87,14 @@ class HLL (Simulator.BaseSimulator):
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -59,9 +59,7 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -74,6 +72,16 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Harten-Lax-van Leer (2nd order)"
|
||||
|
||||
@ -90,13 +98,13 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
@ -106,12 +114,14 @@ class HLL2 (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -60,9 +60,7 @@ class KP07 (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -75,6 +73,16 @@ class KP07 (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Kurganov-Petrova 2007"
|
||||
|
||||
@ -88,13 +96,13 @@ class KP07 (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(substep), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
|
||||
def stepEuler(self, dt):
|
||||
self.substepRK(dt, 0)
|
||||
@ -108,5 +116,4 @@ class KP07 (Simulator.BaseSimulator):
|
||||
self.t += dt
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
|
||||
return self.u0.download(self.stream)
|
@ -26,7 +26,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -60,9 +60,7 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -75,6 +73,16 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Kurganov-Petrova 2007 dimensionally split"
|
||||
|
||||
@ -91,13 +99,13 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
@ -107,13 +115,14 @@ class KP07_dimsplit (Simulator.BaseSimulator):
|
||||
self.g, \
|
||||
self.theta, \
|
||||
np.int32(1), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
@ -21,7 +21,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
"""
|
||||
|
||||
#Import packages we need
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -56,9 +56,7 @@ class LxF (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -68,6 +66,16 @@ class LxF (Simulator.BaseSimulator):
|
||||
"iiffffPiPiPiPiPiPi", \
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
1, 1, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Lax Friedrichs"
|
||||
@ -80,13 +88,14 @@ class LxF (Simulator.BaseSimulator):
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
||||
|
@ -48,9 +48,7 @@ class BaseSimulator:
|
||||
"""
|
||||
def __init__(self, \
|
||||
context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height):
|
||||
@ -67,14 +65,6 @@ class BaseSimulator:
|
||||
|
||||
#Create a CUDA stream
|
||||
self.stream = cuda.Stream()
|
||||
|
||||
#Create data by uploading to device
|
||||
free, total = cuda.mem_get_info()
|
||||
self.logger.debug("GPU memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
|
||||
self.data = Common.SWEDataArakawaA(self.stream, \
|
||||
nx, ny, \
|
||||
ghost_cells_x, ghost_cells_y, \
|
||||
h0, hu0, hv0)
|
||||
|
||||
#Save input parameters
|
||||
#Notice that we need to specify them in the correct dataformat for the
|
||||
@ -94,7 +84,7 @@ class BaseSimulator:
|
||||
self.global_size = ( \
|
||||
int(np.ceil(self.nx / float(self.local_size[0]))), \
|
||||
int(np.ceil(self.ny / float(self.local_size[1]))) \
|
||||
)
|
||||
)
|
||||
|
||||
"""
|
||||
Function which simulates forward in time using the default simulation type
|
||||
@ -192,7 +182,7 @@ class BaseSimulator:
|
||||
return self.t
|
||||
|
||||
def download(self):
|
||||
return self.data.download(self.stream)
|
||||
raise(NotImplementedError("Needs to be implemented in subclass"))
|
||||
|
||||
def synchronize(self):
|
||||
self.stream.synchronize()
|
||||
|
@ -22,7 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#Import packages we need
|
||||
import numpy as np
|
||||
from GPUSimulators import Simulator
|
||||
from GPUSimulators import Simulator, Common
|
||||
|
||||
|
||||
|
||||
@ -55,9 +55,7 @@ class WAF (Simulator.BaseSimulator):
|
||||
|
||||
# Call super constructor
|
||||
super().__init__(context, \
|
||||
h0, hu0, hv0, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
dx, dy, dt, \
|
||||
g, \
|
||||
block_width, block_height);
|
||||
@ -68,6 +66,16 @@ class WAF (Simulator.BaseSimulator):
|
||||
BLOCK_WIDTH=self.local_size[0], \
|
||||
BLOCK_HEIGHT=self.local_size[1])
|
||||
|
||||
#Create data by uploading to device
|
||||
self.u0 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[h0, hu0, hv0])
|
||||
self.u1 = Common.ArakawaA2D(self.stream, \
|
||||
nx, ny, \
|
||||
2, 2, \
|
||||
[None, None, None])
|
||||
|
||||
def __str__(self):
|
||||
return "Weighted average flux"
|
||||
|
||||
@ -79,30 +87,33 @@ class WAF (Simulator.BaseSimulator):
|
||||
|
||||
def stepDimsplitXY(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(0), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(0), \
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def stepDimsplitYX(self, dt):
|
||||
self.kernel.prepared_async_call(self.global_size, self.local_size, self.stream, \
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(1), \
|
||||
self.data.h0.data.gpudata, self.data.h0.data.strides[0], \
|
||||
self.data.hu0.data.gpudata, self.data.hu0.data.strides[0], \
|
||||
self.data.hv0.data.gpudata, self.data.hv0.data.strides[0], \
|
||||
self.data.h1.data.gpudata, self.data.h1.data.strides[0], \
|
||||
self.data.hu1.data.gpudata, self.data.hu1.data.strides[0], \
|
||||
self.data.hv1.data.gpudata, self.data.hv1.data.strides[0])
|
||||
self.data.swap()
|
||||
self.nx, self.ny, \
|
||||
self.dx, self.dy, dt, \
|
||||
self.g, \
|
||||
np.int32(1), \
|
||||
self.u0[0].data.gpudata, self.u0[0].data.strides[0], \
|
||||
self.u0[1].data.gpudata, self.u0[1].data.strides[0], \
|
||||
self.u0[2].data.gpudata, self.u0[2].data.strides[0], \
|
||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0], \
|
||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0], \
|
||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0])
|
||||
self.u0, self.u1 = self.u1, self.u0
|
||||
self.t += dt
|
||||
|
||||
def download(self):
|
||||
return self.u0.download(self.stream)
|
Loading…
x
Reference in New Issue
Block a user