mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
feat(GPUSimulator): add the extra variables introduced in SWWECommon for all other algorithms
This commit is contained in:
parent
71cc5628c8
commit
aa54d21f0a
@ -41,7 +41,9 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
g,
|
g,
|
||||||
cfl_scale=0.9,
|
cfl_scale=0.9,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -55,6 +57,7 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -75,11 +78,11 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("FORCEKernel")
|
self.kernel = module.get_function("FORCEKernel")
|
||||||
self.kernel.prepare("iiffffiPiPiPiPiPiPiP")
|
self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -91,10 +94,15 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
1, 1,
|
1, 1,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt = min(dt_x, dt_y)
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||||
@ -108,7 +116,10 @@ class FORCE (Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
|
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -40,7 +40,9 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
g,
|
g,
|
||||||
cfl_scale=0.9,
|
cfl_scale=0.9,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -54,6 +56,7 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -74,11 +77,11 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("HLLKernel")
|
self.kernel = module.get_function("HLLKernel")
|
||||||
self.kernel.prepare("iiffffiPiPiPiPiPiPiP")
|
self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -90,10 +93,14 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
1, 1,
|
1, 1,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
dt = min(dt_x, dt_y)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream,
|
||||||
@ -107,7 +114,9 @@ class HLL (Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -41,7 +41,9 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
theta=1.8,
|
theta=1.8,
|
||||||
cfl_scale=0.9,
|
cfl_scale=0.9,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -55,6 +57,7 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -76,11 +79,11 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("HLL2Kernel")
|
self.kernel = module.get_function("HLL2Kernel")
|
||||||
self.kernel.prepare("iifffffiiPiPiPiPiPiPiP")
|
self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -92,10 +95,15 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
2, 2,
|
2, 2,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt = min(dt_x, dt_y)
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
self.substepDimsplit(dt*0.5, step_number)
|
self.substepDimsplit(dt*0.5, step_number)
|
||||||
@ -114,7 +122,9 @@ class HLL2 (Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -47,7 +47,9 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
cfl_scale=0.9,
|
cfl_scale=0.9,
|
||||||
order=2,
|
order=2,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -61,6 +63,7 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -83,11 +86,11 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("KP07Kernel")
|
self.kernel = module.get_function("KP07Kernel")
|
||||||
self.kernel.prepare("iifffffiiPiPiPiPiPiPiP")
|
self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -99,10 +102,15 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
2, 2,
|
2, 2,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt = min(dt_x, dt_y)
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
@ -123,7 +131,9 @@ class KP07 (Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -46,7 +46,9 @@ class KP07_dimsplit(Simulator.BaseSimulator):
|
|||||||
theta=1.3,
|
theta=1.3,
|
||||||
cfl_scale=0.9,
|
cfl_scale=0.9,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -60,6 +62,7 @@ class KP07_dimsplit(Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -83,11 +86,11 @@ class KP07_dimsplit(Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("KP07DimsplitKernel")
|
self.kernel = module.get_function("KP07DimsplitKernel")
|
||||||
self.kernel.prepare("iifffffiiPiPiPiPiPiPiP")
|
self.kernel.prepare("iifffffiiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -99,10 +102,15 @@ class KP07_dimsplit(Simulator.BaseSimulator):
|
|||||||
self.gc_x, self.gc_y,
|
self.gc_x, self.gc_y,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt = min(dt_x, dt_y)
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
self.substepDimsplit(dt*0.5, step_number)
|
self.substepDimsplit(dt*0.5, step_number)
|
||||||
@ -121,7 +129,9 @@ class KP07_dimsplit(Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -41,7 +41,9 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
g: float,
|
g: float,
|
||||||
cfl_scale: float=0.9,
|
cfl_scale: float=0.9,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width: int=16, block_height: int=16):
|
block_width: int=16, block_height: int=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -55,6 +57,7 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -75,11 +78,11 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("LxFKernel")
|
self.kernel = module.get_function("LxFKernel")
|
||||||
self.kernel.prepare("iiffffiPiPiPiPiPiPiP")
|
self.kernel.prepare("iiffffiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -91,10 +94,15 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
1, 1,
|
1, 1,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt = min(dt_x, dt_y)
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
"""
|
"""
|
||||||
@ -113,7 +121,9 @@ class LxF (Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -41,7 +41,9 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
g,
|
g,
|
||||||
cfl_scale=0.9,
|
cfl_scale=0.9,
|
||||||
boundary_conditions=BoundaryCondition(),
|
boundary_conditions=BoundaryCondition(),
|
||||||
block_width=16, block_height=16):
|
block_width=16, block_height=16,
|
||||||
|
dt: float=None,
|
||||||
|
compile_opts: list[str]=[]):
|
||||||
"""
|
"""
|
||||||
Initialization routine
|
Initialization routine
|
||||||
|
|
||||||
@ -55,6 +57,7 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
dy: Grid cell spacing along y-axis (20 000 m)
|
dy: Grid cell spacing along y-axis (20 000 m)
|
||||||
dt: Size of each timestep (90 s)
|
dt: Size of each timestep (90 s)
|
||||||
g: Gravitational accelleration (9.81 m/s^2)
|
g: Gravitational accelleration (9.81 m/s^2)
|
||||||
|
compile_opts: Pass a list of nvcc compiler options
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Call super constructor
|
# Call super constructor
|
||||||
@ -75,11 +78,11 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
},
|
},
|
||||||
compile_args={
|
compile_args={
|
||||||
'no_extern_c': True,
|
'no_extern_c': True,
|
||||||
'options': ["--use_fast_math"],
|
'options': ["--use_fast_math"] + compile_opts,
|
||||||
},
|
},
|
||||||
jit_compile_args={})
|
jit_compile_args={})
|
||||||
self.kernel = module.get_function("WAFKernel")
|
self.kernel = module.get_function("WAFKernel")
|
||||||
self.kernel.prepare("iiffffiiPiPiPiPiPiPiP")
|
self.kernel.prepare("iiffffiiPiPiPiPiPiPiPiiii")
|
||||||
|
|
||||||
#Create data by uploading to device
|
#Create data by uploading to device
|
||||||
self.u0 = Common.ArakawaA2D(self.stream,
|
self.u0 = Common.ArakawaA2D(self.stream,
|
||||||
@ -91,10 +94,15 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
2, 2,
|
2, 2,
|
||||||
[None, None, None])
|
[None, None, None])
|
||||||
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
self.cfl_data = gpuarray.GPUArray(self.grid_size, dtype=np.float32)
|
||||||
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
|
||||||
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
if dt == None:
|
||||||
dt = min(dt_x, dt_y)
|
dt_x = np.min(self.dx / (np.abs(hu0/h0) + np.sqrt(g*h0)))
|
||||||
self.cfl_data.fill(dt, stream=self.stream)
|
dt_y = np.min(self.dy / (np.abs(hv0/h0) + np.sqrt(g*h0)))
|
||||||
|
self.dt = min(dt_x, dt_y)
|
||||||
|
else:
|
||||||
|
self.dt = dt
|
||||||
|
|
||||||
|
self.cfl_data.fill(self.dt, stream=self.stream)
|
||||||
|
|
||||||
def substep(self, dt, step_number):
|
def substep(self, dt, step_number):
|
||||||
self.substepDimsplit(dt*0.5, step_number)
|
self.substepDimsplit(dt*0.5, step_number)
|
||||||
@ -112,7 +120,9 @@ class WAF (Simulator.BaseSimulator):
|
|||||||
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
self.u1[0].data.gpudata, self.u1[0].data.strides[0],
|
||||||
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
self.u1[1].data.gpudata, self.u1[1].data.strides[0],
|
||||||
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
self.u1[2].data.gpudata, self.u1[2].data.strides[0],
|
||||||
self.cfl_data.gpudata)
|
self.cfl_data.gpudata,
|
||||||
|
0, 0,
|
||||||
|
self.nx, self.ny)
|
||||||
self.u0, self.u1 = self.u1, self.u0
|
self.u0, self.u1 = self.u1, self.u0
|
||||||
|
|
||||||
def getOutput(self):
|
def getOutput(self):
|
||||||
|
@ -100,7 +100,17 @@ __global__ void FORCEKernel(
|
|||||||
float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
//Output CFL
|
//Output CFL
|
||||||
float* cfl_) {
|
float* cfl_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
@ -112,9 +122,9 @@ __global__ void FORCEKernel(
|
|||||||
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Compute flux along x, and evolve
|
//Compute flux along x, and evolve
|
||||||
@ -130,9 +140,9 @@ __global__ void FORCEKernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Write to main memory
|
//Write to main memory
|
||||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute the CFL for this block
|
//Compute the CFL for this block
|
||||||
if (cfl_ != NULL) {
|
if (cfl_ != NULL) {
|
||||||
|
@ -116,7 +116,17 @@ __global__ void HLLKernel(
|
|||||||
float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
//Output CFL
|
//Output CFL
|
||||||
float* cfl_) {
|
float* cfl_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
@ -129,9 +139,9 @@ __global__ void HLLKernel(
|
|||||||
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute F flux
|
//Compute F flux
|
||||||
computeFluxF(Q, F, g_);
|
computeFluxF(Q, F, g_);
|
||||||
@ -148,9 +158,9 @@ __global__ void HLLKernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
// Write to main memory for all internal cells
|
// Write to main memory for all internal cells
|
||||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute the CFL for this block
|
//Compute the CFL for this block
|
||||||
if (cfl_ != NULL) {
|
if (cfl_ != NULL) {
|
||||||
|
@ -144,7 +144,17 @@ __global__ void HLL2Kernel(
|
|||||||
float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
//Output CFL
|
//Output CFL
|
||||||
float* cfl_) {
|
float* cfl_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
@ -158,9 +168,9 @@ __global__ void HLL2Kernel(
|
|||||||
__shared__ float F[3][h+4][w+4];
|
__shared__ float F[3][h+4][w+4];
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Step 0 => evolve x first, then y
|
//Step 0 => evolve x first, then y
|
||||||
if (step_ == 0) {
|
if (step_ == 0) {
|
||||||
@ -203,9 +213,9 @@ __global__ void HLL2Kernel(
|
|||||||
|
|
||||||
|
|
||||||
// Write to main memory for all internal cells
|
// Write to main memory for all internal cells
|
||||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute the CFL for this block
|
//Compute the CFL for this block
|
||||||
if (cfl_ != NULL) {
|
if (cfl_ != NULL) {
|
||||||
|
@ -154,7 +154,18 @@ __global__ void KP07Kernel(
|
|||||||
float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
//Output CFL
|
//Output CFL
|
||||||
float* cfl_) {
|
float* cfl_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc_x = 2;
|
const unsigned int gc_x = 2;
|
||||||
@ -179,9 +190,9 @@ __global__ void KP07Kernel(
|
|||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
|
|
||||||
|
|
||||||
//Reconstruct slopes along x and axis
|
//Reconstruct slopes along x and axis
|
||||||
|
@ -141,7 +141,18 @@ __global__ void KP07DimsplitKernel(
|
|||||||
float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
//Output CFL
|
//Output CFL
|
||||||
float* cfl_) {
|
float* cfl_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
const unsigned int gc_x = 2;
|
const unsigned int gc_x = 2;
|
||||||
@ -154,9 +165,9 @@ __global__ void KP07DimsplitKernel(
|
|||||||
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
__shared__ float F[vars][h+2*gc_y][w+2*gc_x];
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
|
|
||||||
if (step_ == 0) {
|
if (step_ == 0) {
|
||||||
//Along X
|
//Along X
|
||||||
@ -194,9 +205,9 @@ __global__ void KP07DimsplitKernel(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Write to main memory for all internal cells
|
// Write to main memory for all internal cells
|
||||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute the CFL for this block
|
//Compute the CFL for this block
|
||||||
if (cfl_ != NULL) {
|
if (cfl_ != NULL) {
|
||||||
|
@ -116,8 +116,18 @@ void LxFKernel(
|
|||||||
float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
float* hv1_ptr_, int hv1_pitch_,
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
//Output CFL
|
//Output CFL
|
||||||
float* cfl_) {
|
float* cfl_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
@ -130,9 +140,9 @@ void LxFKernel(
|
|||||||
__shared__ float G[vars][h+1][w ];
|
__shared__ float G[vars][h+1][w ];
|
||||||
|
|
||||||
//Read from global memory
|
//Read from global memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute fluxes along the x and y axis
|
//Compute fluxes along the x and y axis
|
||||||
computeFluxF<w, h>(Q, F, g_, dx_, dt_);
|
computeFluxF<w, h>(Q, F, g_, dx_, dt_);
|
||||||
@ -154,9 +164,9 @@ void LxFKernel(
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
//Write to main memory
|
//Write to main memory
|
||||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
|
|
||||||
//Compute the CFL for this block
|
//Compute the CFL for this block
|
||||||
if (cfl_ != NULL) {
|
if (cfl_ != NULL) {
|
||||||
|
@ -116,7 +116,17 @@ __global__ void WAFKernel(
|
|||||||
//Output h^{n+1}
|
//Output h^{n+1}
|
||||||
float* h1_ptr_, int h1_pitch_,
|
float* h1_ptr_, int h1_pitch_,
|
||||||
float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_,
|
||||||
|
|
||||||
|
//Subarea of internal domain to compute
|
||||||
|
int x0=0, int y0=0,
|
||||||
|
int x1=0, int y1=0) {
|
||||||
|
|
||||||
|
if(x1 == 0)
|
||||||
|
x1 = nx_;
|
||||||
|
|
||||||
|
if(y1 == 0)
|
||||||
|
y1 = ny_;
|
||||||
|
|
||||||
const unsigned int w = BLOCK_WIDTH;
|
const unsigned int w = BLOCK_WIDTH;
|
||||||
const unsigned int h = BLOCK_HEIGHT;
|
const unsigned int h = BLOCK_HEIGHT;
|
||||||
@ -131,9 +141,9 @@ __global__ void WAFKernel(
|
|||||||
|
|
||||||
|
|
||||||
//Read into shared memory Q from global memory
|
//Read into shared memory Q from global memory
|
||||||
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, 1>( h0_ptr_, h0_pitch_, Q[0], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, -1, 1>(hu0_ptr_, hu0_pitch_, Q[1], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_);
|
readBlock<w, h, gc_x, gc_y, 1, -1>(hv0_ptr_, hv0_pitch_, Q[2], nx_, ny_, boundary_conditions_, x0, y0, x1, y1);
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
|
|
||||||
@ -170,9 +180,9 @@ __global__ void WAFKernel(
|
|||||||
|
|
||||||
|
|
||||||
// Write to main memory for all internal cells
|
// Write to main memory for all internal cells
|
||||||
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>( h1_ptr_, h1_pitch_, Q[0], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hu1_ptr_, hu1_pitch_, Q[1], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1);
|
writeBlock<w, h, gc_x, gc_y>(hv1_ptr_, hv1_pitch_, Q[2], nx_, ny_, 0, 1, x0, y0, x1, y1);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // extern "C"
|
} // extern "C"
|
@ -477,10 +477,11 @@ __device__ float3 FORCE_1D_flux(const float3 Q_l, const float3 Q_r, const float
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// TODO give better names for `Q_w` and `Q_h` in the template
|
||||||
template<int w, int h, int gc_x, int gc_y, int vars>
|
// as it probably does not reflect well on the name
|
||||||
__device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
template<int Q_w, int Q_h, int gc_x, int gc_y, int vars>
|
||||||
float shmem[h+2*gc_y][w+2*gc_x],
|
__device__ void writeCfl(float Q[vars][Q_h+2*gc_y][Q_w+2*gc_x],
|
||||||
|
float shmem[Q_h+2*gc_y][Q_w+2*gc_x],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dx_, const float dy_, const float g_,
|
const float dx_, const float dy_, const float g_,
|
||||||
float* output_) {
|
float* output_) {
|
||||||
@ -509,7 +510,7 @@ __device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
|||||||
if (ti < nx_+gc_x && tj < ny_+gc_y) {
|
if (ti < nx_+gc_x && tj < ny_+gc_y) {
|
||||||
if (ty == gc_y) {
|
if (ty == gc_y) {
|
||||||
float min_val = shmem[ty][tx];
|
float min_val = shmem[ty][tx];
|
||||||
const int max_y = min(h, ny_+gc_y - tj);
|
const int max_y = min(Q_h, ny_+gc_y - tj);
|
||||||
for (int j=gc_y; j<max_y+gc_y; j++) {
|
for (int j=gc_y; j<max_y+gc_y; j++) {
|
||||||
min_val = fminf(min_val, shmem[j][tx]);
|
min_val = fminf(min_val, shmem[j][tx]);
|
||||||
}
|
}
|
||||||
@ -521,7 +522,7 @@ __device__ void writeCfl(float Q[vars][h+2*gc_y][w+2*gc_x],
|
|||||||
//One thread loops over first row to find global max
|
//One thread loops over first row to find global max
|
||||||
if (tx == gc_x && ty == gc_y) {
|
if (tx == gc_x && ty == gc_y) {
|
||||||
float min_val = shmem[ty][tx];
|
float min_val = shmem[ty][tx];
|
||||||
const int max_x = min(w, nx_+gc_x - ti);
|
const int max_x = min(Q_w, nx_+gc_x - ti);
|
||||||
for (int i=gc_x; i<max_x+gc_x; ++i) {
|
for (int i=gc_x; i<max_x+gc_x; ++i) {
|
||||||
min_val = fminf(min_val, shmem[ty][i]);
|
min_val = fminf(min_val, shmem[ty][i]);
|
||||||
}
|
}
|
||||||
|
@ -322,8 +322,8 @@ inline __device__ void readBlock(float* ptr_, int pitch_,
|
|||||||
float Q[h+2*gc_y][w+2*gc_x],
|
float Q[h+2*gc_y][w+2*gc_x],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const int boundary_conditions_,
|
const int boundary_conditions_,
|
||||||
int x0, int y0,
|
int x0, int y0,
|
||||||
int x1, int y1) {
|
int x1, int y1) {
|
||||||
//Index of block within domain
|
//Index of block within domain
|
||||||
const int bx = blockDim.x * blockIdx.x;
|
const int bx = blockDim.x * blockIdx.x;
|
||||||
const int by = blockDim.y * blockIdx.y;
|
const int by = blockDim.y * blockIdx.y;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user