diff --git a/GPUSimulators/cuda/SWE2D_FORCE.cu b/GPUSimulators/cuda/SWE2D_FORCE.cu index 1db7b17..42bb364 100644 --- a/GPUSimulators/cuda/SWE2D_FORCE.cu +++ b/GPUSimulators/cuda/SWE2D_FORCE.cu @@ -28,27 +28,18 @@ along with this program. If not, see . */ __device__ void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], - float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], + float F[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], const float g_, const float dx_, const float dt_) { - - //Index of thread within block - const int tx = threadIdx.x; - const int ty = threadIdx.y; - //Compute fluxes along the x axis - { - int j=ty; - const int l = j + 1; //Skip ghost cells - for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_+2, ny_+2); diff --git a/GPUSimulators/cuda/SWE2D_HLL.cu b/GPUSimulators/cuda/SWE2D_HLL.cu index c05552c..6e2c4ff 100644 --- a/GPUSimulators/cuda/SWE2D_HLL.cu +++ b/GPUSimulators/cuda/SWE2D_HLL.cu @@ -31,24 +31,20 @@ along with this program. If not, see . */ __device__ void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], - float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], + float F[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], const float g_) { - //Index of thread within block - const int tx = threadIdx.x; - const int ty = threadIdx.y; - - { - const int j=ty; - const int l = j + 1; //Skip ghost cells - for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_+2, ny_+2); diff --git a/GPUSimulators/cuda/SWE2D_HLL2.cu b/GPUSimulators/cuda/SWE2D_HLL2.cu index 1b55c86..363710b 100644 --- a/GPUSimulators/cuda/SWE2D_HLL2.cu +++ b/GPUSimulators/cuda/SWE2D_HLL2.cu @@ -33,33 +33,26 @@ along with this program. If not, see . */ __device__ void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], - float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], - float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], + float Qx[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float F[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], const float g_, const float dx_, const float dt_) { - //Index of thread within block - const int tx = threadIdx.x; - const int ty = threadIdx.y; - - { - const int j=ty; - const int l = j + 2; //Skip ghost cells - for (int i=tx; i( h0_ptr_, h0_pitch_, Q[0], nx_+2, ny_+2); diff --git a/GPUSimulators/cuda/SWE2D_KP07.cu b/GPUSimulators/cuda/SWE2D_KP07.cu index 3740776..4565cdf 100644 --- a/GPUSimulators/cuda/SWE2D_KP07.cu +++ b/GPUSimulators/cuda/SWE2D_KP07.cu @@ -96,6 +96,39 @@ void computeFluxG(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], +__device__ void minmodSlopeX(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + const float theta_) { + //Reconstruct slopes along x axis + for (int p=0; p<3; ++p) { + { + const int j = threadIdx.y+2; + for (int i=threadIdx.x+1; i(Q, Qx, theta_); - minmodSlopeY(Q, Qy, theta_); + minmodSlopeX(Q, Qx, theta_); + minmodSlopeY(Q, Qy, theta_); __syncthreads(); diff --git a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu index 385e8c8..be50059 100644 --- a/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu +++ b/GPUSimulators/cuda/SWE2D_KP07_dimsplit.cu @@ -31,77 +31,65 @@ along with this program. If not, see . __device__ void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], - float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], - float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], + float Qx[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float F[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], const float g_, const float dx_, const float dt_) { - //Index of thread within block - const int tx = threadIdx.x; - const int ty = threadIdx.y; - - int j=ty; - const int l = j + 2; //Skip ghost cells - for (int i=tx; i. */ __device__ void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], - float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], + float F[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], const float g_, const float dx_, const float dt_) { - //Index of thread within block - const int tx = threadIdx.x; - const int ty = threadIdx.y; - - { - int j=ty; - const int l = j + 2; //Skip ghost cells - for (int i=tx; i