diff --git a/GPUSimulators/cuda/SWE2D_FORCE.cu b/GPUSimulators/cuda/SWE2D_FORCE.cu
index 1db7b17..42bb364 100644
--- a/GPUSimulators/cuda/SWE2D_FORCE.cu
+++ b/GPUSimulators/cuda/SWE2D_FORCE.cu
@@ -28,27 +28,18 @@ along with this program. If not, see .
*/
__device__
void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
- float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
+ float F[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
const float g_, const float dx_, const float dt_) {
-
- //Index of thread within block
- const int tx = threadIdx.x;
- const int ty = threadIdx.y;
-
//Compute fluxes along the x axis
- {
- int j=ty;
- const int l = j + 1; //Skip ghost cells
- for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_+2, ny_+2);
diff --git a/GPUSimulators/cuda/SWE2D_HLL.cu b/GPUSimulators/cuda/SWE2D_HLL.cu
index c05552c..6e2c4ff 100644
--- a/GPUSimulators/cuda/SWE2D_HLL.cu
+++ b/GPUSimulators/cuda/SWE2D_HLL.cu
@@ -31,24 +31,20 @@ along with this program. If not, see .
*/
__device__
void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
- float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
+ float F[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
const float g_) {
- //Index of thread within block
- const int tx = threadIdx.x;
- const int ty = threadIdx.y;
-
- {
- const int j=ty;
- const int l = j + 1; //Skip ghost cells
- for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_+2, ny_+2);
diff --git a/GPUSimulators/cuda/SWE2D_HLL2.cu b/GPUSimulators/cuda/SWE2D_HLL2.cu
index 1b55c86..363710b 100644
--- a/GPUSimulators/cuda/SWE2D_HLL2.cu
+++ b/GPUSimulators/cuda/SWE2D_HLL2.cu
@@ -33,33 +33,26 @@ along with this program. If not, see .
*/
__device__
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
- float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
- float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
+ float Qx[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
+ float F[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
const float g_, const float dx_, const float dt_) {
- //Index of thread within block
- const int tx = threadIdx.x;
- const int ty = threadIdx.y;
-
- {
- const int j=ty;
- const int l = j + 2; //Skip ghost cells
- for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_+2, ny_+2);
diff --git a/GPUSimulators/cuda/SWE2D_KP07.cu b/GPUSimulators/cuda/SWE2D_KP07.cu
index 3740776..4565cdf 100644
--- a/GPUSimulators/cuda/SWE2D_KP07.cu
+++ b/GPUSimulators/cuda/SWE2D_KP07.cu
@@ -96,6 +96,39 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
+__device__ void minmodSlopeX(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
+ float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
+ const float theta_) {
+ //Reconstruct slopes along x axis
+ for (int p=0; p<3; ++p) {
+ {
+ const int j = threadIdx.y+2;
+ for (int i=threadIdx.x+1; i(Q, Qx, theta_);
- minmodSlopeY(Q, Qy, theta_);
+ minmodSlopeX(Q, Qx, theta_);
+ minmodSlopeY(Q, Qy, theta_);
__syncthreads();
diff --git a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
index 385e8c8..be50059 100644
--- a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
+++ b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu
@@ -31,77 +31,65 @@ along with this program. If not, see .
__device__
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
- float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2],
- float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
+ float Qx[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
+ float F[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
const float g_, const float dx_, const float dt_) {
- //Index of thread within block
- const int tx = threadIdx.x;
- const int ty = threadIdx.y;
-
- int j=ty;
- const int l = j + 2; //Skip ghost cells
- for (int i=tx; i.
*/
__device__
void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
- float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1],
+ float F[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
const float g_, const float dx_, const float dt_) {
- //Index of thread within block
- const int tx = threadIdx.x;
- const int ty = threadIdx.y;
-
- {
- int j=ty;
- const int l = j + 2; //Skip ghost cells
- for (int i=tx; i