mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-05-18 14:34:13 +02:00
Renamed macro and added iPython magic
This commit is contained in:
parent
426b8dba5c
commit
3e401e3fe1
@ -37,6 +37,7 @@ class Timer(object):
|
|||||||
Class which keeps track of the CUDA context and some helper functions
|
Class which keeps track of the CUDA context and some helper functions
|
||||||
"""
|
"""
|
||||||
class CudaContext(object):
|
class CudaContext(object):
|
||||||
|
|
||||||
def __init__(self, verbose=True, blocking=False):
|
def __init__(self, verbose=True, blocking=False):
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
self.blocking = blocking
|
self.blocking = blocking
|
||||||
@ -95,6 +96,9 @@ class CudaContext(object):
|
|||||||
self.cuda_context.detach()
|
self.cuda_context.detach()
|
||||||
|
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return "CudaContext id " + str(self.cuda_context.handle)
|
||||||
|
|
||||||
def hash_kernel(kernel_filename, include_dirs, verbose=False):
|
def hash_kernel(kernel_filename, include_dirs, verbose=False):
|
||||||
# Generate a kernel ID for our cache
|
# Generate a kernel ID for our cache
|
||||||
|
|
||||||
@ -172,8 +176,8 @@ class CudaContext(object):
|
|||||||
print("`-> Kernel changed or not in hash => compiling " + kernel_filename)
|
print("`-> Kernel changed or not in hash => compiling " + kernel_filename)
|
||||||
|
|
||||||
#Create define string
|
#Create define string
|
||||||
define_string = "#define block_width " + str(block_width) + "\n"
|
define_string = "#define BLOCK_WIDTH " + str(block_width) + "\n"
|
||||||
define_string += "#define block_height " + str(block_height) + "\n\n"
|
define_string += "#define BLOCK_HEIGHT " + str(block_height) + "\n\n"
|
||||||
|
|
||||||
kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"'
|
kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"'
|
||||||
|
|
||||||
|
@ -27,8 +27,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+2][block_width+2],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
|
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -39,7 +39,7 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
|
|||||||
{
|
{
|
||||||
int j=ty;
|
int j=ty;
|
||||||
const int l = j + 1; //Skip ghost cells
|
const int l = j + 1; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i;
|
const int k = i;
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
@ -64,15 +64,15 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
|
|||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+2][block_width+2],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Compute fluxes along the y axis
|
//Compute fluxes along the y axis
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j;
|
const int l = j;
|
||||||
{
|
{
|
||||||
int i=tx;
|
int i=tx;
|
||||||
@ -113,8 +113,8 @@ __global__ void FORCEKernel(
|
|||||||
float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
__shared__ float Q[3][block_height+2][block_width+2];
|
__shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
|
@ -32,9 +32,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qx[3][block_height+2][block_width+2],
|
float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -43,7 +43,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
{
|
{
|
||||||
const int j=ty;
|
const int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
// Reconstruct point values of Q at the left and right hand side
|
// Reconstruct point values of Q at the left and right hand side
|
||||||
// of the cell for both the left (i) and right (i+1) cell
|
// of the cell for both the left (i) and right (i+1) cell
|
||||||
@ -84,15 +84,15 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qy[3][block_height+2][block_width+2],
|
float Qy[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
{
|
{
|
||||||
int i=tx;
|
int i=tx;
|
||||||
@ -157,9 +157,9 @@ __global__ void HLL2Kernel(
|
|||||||
float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4];
|
||||||
__shared__ float Qx[3][block_height+2][block_width+2];
|
__shared__ float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,8 +30,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+2][block_width+2],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -40,7 +40,7 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
|
|||||||
{
|
{
|
||||||
const int j=ty;
|
const int j=ty;
|
||||||
const int l = j + 1; //Skip ghost cells
|
const int l = j + 1; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i;
|
const int k = i;
|
||||||
|
|
||||||
const float3 Q_l = make_float3(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
const float3 Q_l = make_float3(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
|
||||||
@ -64,14 +64,14 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
|
|||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+2][block_width+2],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j;
|
const int l = j;
|
||||||
{
|
{
|
||||||
const int i=tx;
|
const int i=tx;
|
||||||
@ -120,8 +120,8 @@ __global__ void HLLKernel(
|
|||||||
float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][block_height+2][block_width+2];
|
__shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
|
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
|
75
SWESimulators/IPythonMagic.py
Normal file
75
SWESimulators/IPythonMagic.py
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
This python module implements helpers for IPython / Jupyter and CUDA
|
||||||
|
|
||||||
|
Copyright (C) 2018 SINTEF ICT
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from IPython.core.magic import line_magic, Magics, magics_class
|
||||||
|
import pycuda.driver as cuda
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@magics_class
|
||||||
|
class CudaContextHandler(Magics):
|
||||||
|
@line_magic
|
||||||
|
def cuda_context_handler(self, context_name):
|
||||||
|
print("Registering " + context_name + " as a global context")
|
||||||
|
|
||||||
|
if context_name in self.shell.user_ns.keys():
|
||||||
|
print("`-> Context already registered! Ignoring")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
print("`-> Creating context")
|
||||||
|
self.shell.ex(context_name + " = Common.CudaContext(verbose=True, blocking=False)")
|
||||||
|
|
||||||
|
# this function will be called on exceptions in any cell
|
||||||
|
def custom_exc(shell, etype, evalue, tb, tb_offset=None):
|
||||||
|
print("Exception caught: Resetting to CUDA context " + context_name)
|
||||||
|
while (cuda.Context.get_current() != None):
|
||||||
|
context = cuda.Context.get_current()
|
||||||
|
print("`-> popping " + str(context.handle))
|
||||||
|
cuda.Context.pop()
|
||||||
|
|
||||||
|
if context_name in self.shell.user_ns.keys():
|
||||||
|
print("`-> pushing " + str(self.shell.user_ns[context_name].cuda_context.handle))
|
||||||
|
self.shell.ex(context_name + ".cuda_context.push()")
|
||||||
|
else:
|
||||||
|
print("No CUDA context called " + context_name + " found (something is wrong)!")
|
||||||
|
print("CUDA will not work now")
|
||||||
|
|
||||||
|
# still show the error within the notebook, don't just swallow it
|
||||||
|
shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset)
|
||||||
|
|
||||||
|
# this registers a custom exception handler for the whole current notebook
|
||||||
|
get_ipython().set_custom_exc((Exception,), custom_exc)
|
||||||
|
|
||||||
|
|
||||||
|
# Handle CUDA context when exiting python
|
||||||
|
import atexit
|
||||||
|
def exitfunc():
|
||||||
|
print("Exitfunc: Resetting CUDA context stack")
|
||||||
|
while (cuda.Context.get_current() != None):
|
||||||
|
context = cuda.Context.get_current()
|
||||||
|
print("`-> popping " + str(context.handle))
|
||||||
|
cuda.Context.pop()
|
||||||
|
atexit.register(exitfunc)
|
||||||
|
|
||||||
|
print("Registering automatic CUDA context handling")
|
||||||
|
print("(use %cuda_context_handler my_context to create a context called my_context")
|
||||||
|
ip = get_ipython()
|
||||||
|
ip.register_magics(CudaContextHandler)
|
@ -30,9 +30,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qx[3][block_height+2][block_width+2],
|
float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -41,7 +41,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
{
|
{
|
||||||
int j=ty;
|
int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
// Reconstruct point values of Q at the left and right hand side
|
// Reconstruct point values of Q at the left and right hand side
|
||||||
// of the cell for both the left (i) and right (i+1) cell
|
// of the cell for both the left (i) and right (i+1) cell
|
||||||
@ -75,15 +75,15 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
}
|
}
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qy[3][block_height+2][block_width+2],
|
float Qy[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
{
|
{
|
||||||
int i=tx;
|
int i=tx;
|
||||||
@ -148,9 +148,9 @@ __global__ void KP07DimsplitKernel(
|
|||||||
|
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4];
|
||||||
__shared__ float Qx[3][block_height+2][block_width+2];
|
__shared__ float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,9 +30,9 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
|
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qx[3][block_height+2][block_width+2],
|
float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -41,7 +41,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
{
|
{
|
||||||
int j=ty;
|
int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
const float3 Qp = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
const float3 Qp = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
|
||||||
@ -61,15 +61,15 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
}
|
}
|
||||||
|
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qy[3][block_height+2][block_width+2],
|
float Qy[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_) {
|
const float g_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
{
|
{
|
||||||
int i=tx;
|
int i=tx;
|
||||||
@ -129,14 +129,14 @@ __global__ void KP07Kernel(
|
|||||||
const int tj = get_global_id(1) + 2;
|
const int tj = get_global_id(1) + 2;
|
||||||
|
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4];
|
||||||
|
|
||||||
//The following slightly wastes memory, but enables us to reuse the
|
//The following slightly wastes memory, but enables us to reuse the
|
||||||
//funcitons in common.opencl
|
//funcitons in common.opencl
|
||||||
__shared__ float Qx[3][block_height+2][block_width+2];
|
__shared__ float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float Qy[3][block_height+2][block_width+2];
|
__shared__ float Qy[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
__shared__ float G[3][block_height+1][block_width+1];
|
__shared__ float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,8 +27,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+2][block_width+2],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height][block_width+1],
|
float F[3][BLOCK_HEIGHT][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -37,7 +37,7 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
|
|||||||
{
|
{
|
||||||
const int j=ty;
|
const int j=ty;
|
||||||
const int l = j + 1; //Skip ghost cells
|
const int l = j + 1; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i;
|
const int k = i;
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
@ -62,14 +62,14 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
|
|||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+2][block_width+2],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j;
|
const int l = j;
|
||||||
{
|
{
|
||||||
const int i=tx;
|
const int i=tx;
|
||||||
@ -114,9 +114,9 @@ __global__ void LxFKernel(
|
|||||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||||
const int tj = get_global_id(1) + 1;
|
const int tj = get_global_id(1) + 1;
|
||||||
|
|
||||||
__shared__ float Q[3][block_height+2][block_width+2];
|
__shared__ float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2];
|
||||||
__shared__ float F[3][block_height][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT][BLOCK_WIDTH+1];
|
||||||
__shared__ float G[3][block_height+1][block_width];
|
__shared__ float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH];
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
readBlock1(h0_ptr_, h0_pitch_,
|
readBlock1(h0_ptr_, h0_pitch_,
|
||||||
|
@ -33,8 +33,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
* Computes the flux along the x axis for all faces
|
* Computes the flux along the x axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxF(float Q[3][block_height+4][block_width+4],
|
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dx_, const float dt_) {
|
const float g_, const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -43,7 +43,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
{
|
{
|
||||||
int j=ty;
|
int j=ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+1; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+1; i+=BLOCK_WIDTH) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
|
|
||||||
// Q at interface from the right and left
|
// Q at interface from the right and left
|
||||||
@ -72,15 +72,15 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
|
|||||||
* Computes the flux along the y axis for all faces
|
* Computes the flux along the y axis for all faces
|
||||||
*/
|
*/
|
||||||
__device__
|
__device__
|
||||||
void computeFluxG(float Q[3][block_height+4][block_width+4],
|
void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const float g_, const float dy_, const float dt_) {
|
const float g_, const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Compute fluxes along the y axis
|
//Compute fluxes along the y axis
|
||||||
for (int j=ty; j<block_height+1; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+1; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
{
|
{
|
||||||
int i=tx;
|
int i=tx;
|
||||||
@ -130,8 +130,8 @@ __global__ void WAFKernel(
|
|||||||
float* hu1_ptr_, int hu1_pitch_,
|
float* hu1_ptr_, int hu1_pitch_,
|
||||||
float* hv1_ptr_, int hv1_pitch_) {
|
float* hv1_ptr_, int hv1_pitch_) {
|
||||||
//Shared memory variables
|
//Shared memory variables
|
||||||
__shared__ float Q[3][block_height+4][block_width+4];
|
__shared__ float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4];
|
||||||
__shared__ float F[3][block_height+1][block_width+1];
|
__shared__ float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1];
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -94,18 +94,18 @@ inline __device__ __host__ float clamp(const float f, const float a, const float
|
|||||||
__device__ void readBlock1(float* h_ptr_, int h_pitch_,
|
__device__ void readBlock1(float* h_ptr_, int h_pitch_,
|
||||||
float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
float Q[3][block_height+2][block_width+2],
|
float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Index of block within domain
|
//Index of block within domain
|
||||||
const int bx = block_width * get_group_id(0);
|
const int bx = BLOCK_WIDTH * get_group_id(0);
|
||||||
const int by = block_height * get_group_id(1);
|
const int by = BLOCK_HEIGHT * get_group_id(1);
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
for (int j=ty; j<block_height+2; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+2; j+=BLOCK_HEIGHT) {
|
||||||
const int l = clamp(by + j, 0, ny_+1); // Out of bounds
|
const int l = clamp(by + j, 0, ny_+1); // Out of bounds
|
||||||
|
|
||||||
//Compute the pointer to current row in the arrays
|
//Compute the pointer to current row in the arrays
|
||||||
@ -113,7 +113,7 @@ __device__ void readBlock1(float* h_ptr_, int h_pitch_,
|
|||||||
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
|
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
|
||||||
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
|
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
|
||||||
|
|
||||||
for (int i=tx; i<block_width+2; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+2; i+=BLOCK_WIDTH) {
|
||||||
const int k = clamp(bx + i, 0, nx_+1); // Out of bounds
|
const int k = clamp(bx + i, 0, nx_+1); // Out of bounds
|
||||||
|
|
||||||
Q[0][j][i] = h_row[k];
|
Q[0][j][i] = h_row[k];
|
||||||
@ -133,18 +133,18 @@ __device__ void readBlock1(float* h_ptr_, int h_pitch_,
|
|||||||
__device__ void readBlock2(float* h_ptr_, int h_pitch_,
|
__device__ void readBlock2(float* h_ptr_, int h_pitch_,
|
||||||
float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
float Q[3][block_height+4][block_width+4],
|
float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
//Index of block within domain
|
//Index of block within domain
|
||||||
const int bx = block_width * get_group_id(0);
|
const int bx = BLOCK_WIDTH * get_group_id(0);
|
||||||
const int by = block_height * get_group_id(1);
|
const int by = BLOCK_HEIGHT * get_group_id(1);
|
||||||
|
|
||||||
//Read into shared memory
|
//Read into shared memory
|
||||||
for (int j=ty; j<block_height+4; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+4; j+=BLOCK_HEIGHT) {
|
||||||
const int l = clamp(by + j, 0, ny_+3); // Out of bounds
|
const int l = clamp(by + j, 0, ny_+3); // Out of bounds
|
||||||
|
|
||||||
//Compute the pointer to current row in the arrays
|
//Compute the pointer to current row in the arrays
|
||||||
@ -152,7 +152,7 @@ __device__ void readBlock2(float* h_ptr_, int h_pitch_,
|
|||||||
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
|
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
|
||||||
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
|
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
|
||||||
|
|
||||||
for (int i=tx; i<block_width+4; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+4; i+=BLOCK_WIDTH) {
|
||||||
const int k = clamp(bx + i, 0, nx_+3); // Out of bounds
|
const int k = clamp(bx + i, 0, nx_+3); // Out of bounds
|
||||||
|
|
||||||
Q[0][j][i] = h_row[k];
|
Q[0][j][i] = h_row[k];
|
||||||
@ -171,7 +171,7 @@ __device__ void readBlock2(float* h_ptr_, int h_pitch_,
|
|||||||
__device__ void writeBlock1(float* h_ptr_, int h_pitch_,
|
__device__ void writeBlock1(float* h_ptr_, int h_pitch_,
|
||||||
float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
float Q[3][block_height+2][block_width+2],
|
float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -206,7 +206,7 @@ __device__ void writeBlock1(float* h_ptr_, int h_pitch_,
|
|||||||
__device__ void writeBlock2(float* h_ptr_, int h_pitch_,
|
__device__ void writeBlock2(float* h_ptr_, int h_pitch_,
|
||||||
float* hu_ptr_, int hu_pitch_,
|
float* hu_ptr_, int hu_pitch_,
|
||||||
float* hv_ptr_, int hv_pitch_,
|
float* hv_ptr_, int hv_pitch_,
|
||||||
float Q[3][block_height+4][block_width+4],
|
float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
const int nx_, const int ny_) {
|
const int nx_, const int ny_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -240,7 +240,7 @@ __device__ void writeBlock2(float* h_ptr_, int h_pitch_,
|
|||||||
* No flow boundary conditions for the shallow water equations
|
* No flow boundary conditions for the shallow water equations
|
||||||
* with one ghost cell in each direction
|
* with one ghost cell in each direction
|
||||||
*/
|
*/
|
||||||
__device__ void noFlowBoundary1(float Q[3][block_height+2][block_width+2], const int nx_, const int ny_) {
|
__device__ void noFlowBoundary1(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], const int nx_, const int ny_) {
|
||||||
//Global index
|
//Global index
|
||||||
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
const int ti = get_global_id(0) + 1; //Skip global ghost cells, i.e., +1
|
||||||
const int tj = get_global_id(1) + 1;
|
const int tj = get_global_id(1) + 1;
|
||||||
@ -284,7 +284,7 @@ __device__ void noFlowBoundary1(float Q[3][block_height+2][block_width+2], const
|
|||||||
* No flow boundary conditions for the shallow water equations
|
* No flow boundary conditions for the shallow water equations
|
||||||
* with two ghost cells in each direction
|
* with two ghost cells in each direction
|
||||||
*/
|
*/
|
||||||
__device__ void noFlowBoundary2(float Q[3][block_height+4][block_width+4], const int nx_, const int ny_) {
|
__device__ void noFlowBoundary2(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], const int nx_, const int ny_) {
|
||||||
//Global index
|
//Global index
|
||||||
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
const int ti = get_global_id(0) + 2; //Skip global ghost cells, i.e., +2
|
||||||
const int tj = get_global_id(1) + 2;
|
const int tj = get_global_id(1) + 2;
|
||||||
@ -342,8 +342,8 @@ __device__ void noFlowBoundary2(float Q[3][block_height+4][block_width+4], const
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the x axis (dimensional splitting)
|
* Evolves the solution in time along the x axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
__device__ void evolveF1(float Q[3][block_height+2][block_width+2],
|
__device__ void evolveF1(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dx_, const float dt_) {
|
const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -372,8 +372,8 @@ __device__ void evolveF1(float Q[3][block_height+2][block_width+2],
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the x axis (dimensional splitting)
|
* Evolves the solution in time along the x axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
__device__ void evolveF2(float Q[3][block_height+4][block_width+4],
|
__device__ void evolveF2(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float F[3][block_height+1][block_width+1],
|
float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dx_, const float dt_) {
|
const float dx_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -402,8 +402,8 @@ __device__ void evolveF2(float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
__device__ void evolveG1(float Q[3][block_height+2][block_width+2],
|
__device__ void evolveG1(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dy_, const float dt_) {
|
const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
@ -433,8 +433,8 @@ __device__ void evolveG1(float Q[3][block_height+2][block_width+2],
|
|||||||
/**
|
/**
|
||||||
* Evolves the solution in time along the y axis (dimensional splitting)
|
* Evolves the solution in time along the y axis (dimensional splitting)
|
||||||
*/
|
*/
|
||||||
__device__ void evolveG2(float Q[3][block_height+4][block_width+4],
|
__device__ void evolveG2(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float G[3][block_height+1][block_width+1],
|
float G[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
|
||||||
const int nx_, const int ny_,
|
const int nx_, const int ny_,
|
||||||
const float dy_, const float dt_) {
|
const float dy_, const float dt_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
|
@ -46,8 +46,8 @@ __device__ __inline__ float minmodSlope(float left, float center, float right, f
|
|||||||
/**
|
/**
|
||||||
* Reconstructs a minmod slope for a whole block along the abscissa
|
* Reconstructs a minmod slope for a whole block along the abscissa
|
||||||
*/
|
*/
|
||||||
__device__ void minmodSlopeX(float Q[3][block_height+4][block_width+4],
|
__device__ void minmodSlopeX(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qx[3][block_height+2][block_width+2],
|
float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
const float theta_) {
|
const float theta_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
@ -57,7 +57,7 @@ __device__ void minmodSlopeX(float Q[3][block_height+4][block_width+4],
|
|||||||
{
|
{
|
||||||
const int j = ty;
|
const int j = ty;
|
||||||
const int l = j + 2; //Skip ghost cells
|
const int l = j + 2; //Skip ghost cells
|
||||||
for (int i=tx; i<block_width+2; i+=block_width) {
|
for (int i=tx; i<BLOCK_WIDTH+2; i+=BLOCK_WIDTH) {
|
||||||
const int k = i + 1;
|
const int k = i + 1;
|
||||||
for (int p=0; p<3; ++p) {
|
for (int p=0; p<3; ++p) {
|
||||||
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
|
||||||
@ -70,14 +70,14 @@ __device__ void minmodSlopeX(float Q[3][block_height+4][block_width+4],
|
|||||||
/**
|
/**
|
||||||
* Reconstructs a minmod slope for a whole block along the ordinate
|
* Reconstructs a minmod slope for a whole block along the ordinate
|
||||||
*/
|
*/
|
||||||
__device__ void minmodSlopeY(float Q[3][block_height+4][block_width+4],
|
__device__ void minmodSlopeY(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
|
||||||
float Qy[3][block_height+2][block_width+2],
|
float Qy[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
|
||||||
const float theta_) {
|
const float theta_) {
|
||||||
//Index of thread within block
|
//Index of thread within block
|
||||||
const int tx = get_local_id(0);
|
const int tx = get_local_id(0);
|
||||||
const int ty = get_local_id(1);
|
const int ty = get_local_id(1);
|
||||||
|
|
||||||
for (int j=ty; j<block_height+2; j+=block_height) {
|
for (int j=ty; j<BLOCK_HEIGHT+2; j+=BLOCK_HEIGHT) {
|
||||||
const int l = j + 1;
|
const int l = j + 1;
|
||||||
{
|
{
|
||||||
const int i = tx;
|
const int i = tx;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user