Removed get_local_size

This commit is contained in:
André R. Brodtkorb 2018-07-25 16:40:49 +02:00
parent a0f429148c
commit 8c431d2a7d
8 changed files with 127 additions and 177 deletions

File diff suppressed because one or more lines are too long

View File

@ -35,9 +35,10 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
const int ty = get_local_id(1); const int ty = get_local_id(1);
//Compute fluxes along the x axis //Compute fluxes along the x axis
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
int j=ty;
const int l = j + 1; //Skip ghost cells const int l = j + 1; //Skip ghost cells
for (int i=tx; i<block_width+1; i+=get_local_size(0)) { for (int i=tx; i<block_width+1; i+=block_width) {
const int k = i; const int k = i;
// Q at interface from the right and left // Q at interface from the right and left
@ -70,9 +71,10 @@ void computeFluxG(float Q[3][block_height+2][block_width+2],
const int ty = get_local_id(1); const int ty = get_local_id(1);
//Compute fluxes along the y axis //Compute fluxes along the y axis
for (int j=ty; j<block_height+1; j+=get_local_size(1)) { for (int j=ty; j<block_height+1; j+=block_height) {
const int l = j; const int l = j;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
int i=tx;
const int k = i + 1; //Skip ghost cells const int k = i + 1; //Skip ghost cells
// Q at interface from the right and left // Q at interface from the right and left

View File

@ -38,9 +38,10 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
const int j=ty;
const int l = j + 2; //Skip ghost cells const int l = j + 2; //Skip ghost cells
for (int i=tx; i<block_width+1; i+=get_local_size(0)) { for (int i=tx; i<block_width+1; i+=block_width) {
const int k = i + 1; const int k = i + 1;
// Reconstruct point values of Q at the left and right hand side // Reconstruct point values of Q at the left and right hand side
// of the cell for both the left (i) and right (i+1) cell // of the cell for both the left (i) and right (i+1) cell
@ -89,9 +90,10 @@ void computeFluxG(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height+1; j+=get_local_size(1)) { for (int j=ty; j<block_height+1; j+=block_height) {
const int l = j + 1; const int l = j + 1;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
int i=tx;
const int k = i + 2; //Skip ghost cells const int k = i + 2; //Skip ghost cells
// Reconstruct point values of Q at the left and right hand side // Reconstruct point values of Q at the left and right hand side
// of the cell for both the left (i) and right (i+1) cell // of the cell for both the left (i) and right (i+1) cell

View File

@ -36,9 +36,10 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
const int j=ty;
const int l = j + 1; //Skip ghost cells const int l = j + 1; //Skip ghost cells
for (int i=tx; i<block_width+1; i+=get_local_size(0)) { for (int i=tx; i<block_width+1; i+=block_width) {
const int k = i; const int k = i;
const float3 Q_l = make_float3(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]); const float3 Q_l = make_float3(Q[0][l][k ], Q[1][l][k ], Q[2][l][k ]);
@ -69,9 +70,10 @@ void computeFluxG(float Q[3][block_height+2][block_width+2],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height+1; j+=get_local_size(1)) { for (int j=ty; j<block_height+1; j+=block_height) {
const int l = j; const int l = j;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
const int i=tx;
const int k = i + 1; //Skip ghost cells const int k = i + 1; //Skip ghost cells
//NOte that hu and hv are swapped ("transposing" the domain)! //NOte that hu and hv are swapped ("transposing" the domain)!

View File

@ -36,9 +36,10 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
int j=ty;
const int l = j + 2; //Skip ghost cells const int l = j + 2; //Skip ghost cells
for (int i=tx; i<block_width+1; i+=get_local_size(0)) { for (int i=tx; i<block_width+1; i+=block_width) {
const int k = i + 1; const int k = i + 1;
// Reconstruct point values of Q at the left and right hand side // Reconstruct point values of Q at the left and right hand side
// of the cell for both the left (i) and right (i+1) cell // of the cell for both the left (i) and right (i+1) cell
@ -80,9 +81,10 @@ void computeFluxG(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height+1; j+=get_local_size(1)) { for (int j=ty; j<block_height+1; j+=block_height) {
const int l = j + 1; const int l = j + 1;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
int i=tx;
const int k = i + 2; //Skip ghost cells const int k = i + 2; //Skip ghost cells
// Reconstruct point values of Q at the left and right hand side // Reconstruct point values of Q at the left and right hand side
// of the cell for both the left (i) and right (i+1) cell // of the cell for both the left (i) and right (i+1) cell

View File

@ -36,9 +36,10 @@ void computeFluxF(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
int j=ty;
const int l = j + 2; //Skip ghost cells const int l = j + 2; //Skip ghost cells
for (int i=tx; i<block_width+1; i+=get_local_size(0)) { for (int i=tx; i<block_width+1; i+=block_width) {
const int k = i + 1; const int k = i + 1;
// Q at interface from the right and left // Q at interface from the right and left
const float3 Qp = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1], const float3 Qp = make_float3(Q[0][l][k+1] - 0.5f*Qx[0][j][i+1],
@ -66,9 +67,10 @@ void computeFluxG(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height+1; j+=get_local_size(1)) { for (int j=ty; j<block_height+1; j+=block_height) {
const int l = j + 1; const int l = j + 1;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
int i=tx;
const int k = i + 2; //Skip ghost cells const int k = i + 2; //Skip ghost cells
// Q at interface from the right and left // Q at interface from the right and left
// Note that we swap hu and hv // Note that we swap hu and hv

View File

@ -33,9 +33,10 @@ void computeFluxF(float Q[3][block_height+2][block_width+2],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
const int j=ty;
const int l = j + 1; //Skip ghost cells const int l = j + 1; //Skip ghost cells
for (int i=tx; i<block_width+1; i+=get_local_size(0)) { for (int i=tx; i<block_width+1; i+=block_width) {
const int k = i; const int k = i;
// Q at interface from the right and left // Q at interface from the right and left
@ -67,9 +68,10 @@ void computeFluxG(float Q[3][block_height+2][block_width+2],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height+1; j+=get_local_size(1)) { for (int j=ty; j<block_height+1; j+=block_height) {
const int l = j; const int l = j;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
const int i=tx;
const int k = i + 1; //Skip ghost cells const int k = i + 1; //Skip ghost cells
// Q at interface from the right and left // Q at interface from the right and left

View File

@ -60,17 +60,6 @@ __device__ int get_global_id(int dim) {
} }
__device__ int get_local_size(int dim) {
switch(dim) {
case 0: return blockDim.x;
case 1: return blockDim.y;
case 2: return blockDim.z;
default: return -1;
}
}
/** /**
* Float3 operators * Float3 operators
*/ */
@ -111,11 +100,11 @@ __device__ void readBlock1(float* h_ptr_, int h_pitch_,
const int ty = get_local_id(1); const int ty = get_local_id(1);
//Index of block within domain //Index of block within domain
const int bx = get_local_size(0) * get_group_id(0); const int bx = block_width * get_group_id(0);
const int by = get_local_size(1) * get_group_id(1); const int by = block_height * get_group_id(1);
//Read into shared memory //Read into shared memory
for (int j=ty; j<block_height+2; j+=get_local_size(1)) { for (int j=ty; j<block_height+2; j+=block_height) {
const int l = clamp(by + j, 0, ny_+1); // Out of bounds const int l = clamp(by + j, 0, ny_+1); // Out of bounds
//Compute the pointer to current row in the arrays //Compute the pointer to current row in the arrays
@ -123,7 +112,7 @@ __device__ void readBlock1(float* h_ptr_, int h_pitch_,
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l); float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l); float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
for (int i=tx; i<block_width+2; i+=get_local_size(0)) { for (int i=tx; i<block_width+2; i+=block_width) {
const int k = clamp(bx + i, 0, nx_+1); // Out of bounds const int k = clamp(bx + i, 0, nx_+1); // Out of bounds
Q[0][j][i] = h_row[k]; Q[0][j][i] = h_row[k];
@ -150,11 +139,11 @@ __device__ void readBlock2(float* h_ptr_, int h_pitch_,
const int ty = get_local_id(1); const int ty = get_local_id(1);
//Index of block within domain //Index of block within domain
const int bx = get_local_size(0) * get_group_id(0); const int bx = block_width * get_group_id(0);
const int by = get_local_size(1) * get_group_id(1); const int by = block_height * get_group_id(1);
//Read into shared memory //Read into shared memory
for (int j=ty; j<block_height+4; j+=get_local_size(1)) { for (int j=ty; j<block_height+4; j+=block_height) {
const int l = clamp(by + j, 0, ny_+3); // Out of bounds const int l = clamp(by + j, 0, ny_+3); // Out of bounds
//Compute the pointer to current row in the arrays //Compute the pointer to current row in the arrays
@ -162,7 +151,7 @@ __device__ void readBlock2(float* h_ptr_, int h_pitch_,
float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l); float* const hu_row = (float*) ((char*) hu_ptr_ + hu_pitch_*l);
float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l); float* const hv_row = (float*) ((char*) hv_ptr_ + hv_pitch_*l);
for (int i=tx; i<block_width+4; i+=get_local_size(0)) { for (int i=tx; i<block_width+4; i+=block_width) {
const int k = clamp(bx + i, 0, nx_+3); // Out of bounds const int k = clamp(bx + i, 0, nx_+3); // Out of bounds
Q[0][j][i] = h_row[k]; Q[0][j][i] = h_row[k];
@ -504,9 +493,10 @@ __device__ void minmodSlopeX(float Q[3][block_height+4][block_width+4],
const int ty = get_local_id(1); const int ty = get_local_id(1);
//Reconstruct slopes along x axis //Reconstruct slopes along x axis
for (int j=ty; j<block_height; j+=get_local_size(1)) { {
const int j = ty;
const int l = j + 2; //Skip ghost cells const int l = j + 2; //Skip ghost cells
for (int i=tx; i<block_width+2; i+=get_local_size(0)) { for (int i=tx; i<block_width+2; i+=block_width) {
const int k = i + 1; const int k = i + 1;
for (int p=0; p<3; ++p) { for (int p=0; p<3; ++p) {
Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_); Qx[p][j][i] = minmodSlope(Q[p][l][k-1], Q[p][l][k], Q[p][l][k+1], theta_);
@ -526,9 +516,10 @@ __device__ void minmodSlopeY(float Q[3][block_height+4][block_width+4],
const int tx = get_local_id(0); const int tx = get_local_id(0);
const int ty = get_local_id(1); const int ty = get_local_id(1);
for (int j=ty; j<block_height+2; j+=get_local_size(1)) { for (int j=ty; j<block_height+2; j+=block_height) {
const int l = j + 1; const int l = j + 1;
for (int i=tx; i<block_width; i+=get_local_size(0)) { {
const int i = tx;
const int k = i + 2; //Skip ghost cells const int k = i + 2; //Skip ghost cells
for (int p=0; p<3; ++p) { for (int p=0; p<3; ++p) {
Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_); Qy[p][j][i] = minmodSlope(Q[p][l-1][k], Q[p][l][k], Q[p][l+1][k], theta_);