hip-python implementation

This commit is contained in:
Hicham Agueny
2024-06-09 22:48:06 +02:00
parent d5601ec808
commit 2a7a8c6258
23 changed files with 1769 additions and 1419 deletions

View File

@@ -25,7 +25,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "EulerCommon.h"
#include "limiters.h"
__device__
void computeFluxF(float Q[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
float Qx[4][BLOCK_HEIGHT+4][BLOCK_WIDTH+4],
@@ -248,4 +247,4 @@ __global__ void KP07DimsplitKernel(
}
} // extern "C"
} // extern "C"

View File

@@ -24,6 +24,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#pragma once
#include <stddef.h>
#include <float.h>
/**
* Float3 operators
@@ -86,9 +88,6 @@ __device__ float desingularize(float x_, float eps_) {
/**
* Returns the step stored in the leftmost 16 bits
* of the 32 bit step-order integer
@@ -497,14 +496,18 @@ __device__ void memset(float Q[vars][shmem_height][shmem_width], float value) {
template <unsigned int threads>
__device__ void reduce_max(float* data, unsigned int n) {
//__device__ void reduce_max(float* data, unsigned int n) {
__device__ float reduce_max(float* data, unsigned int n) {
__shared__ float sdata[threads];
unsigned int tid = threadIdx.x;
//Reduce to "threads" elements
sdata[tid] = FLT_MIN;
for (unsigned int i=tid; i<n; i += threads) {
sdata[tid] = max(sdata[tid], dt_ctx.L[i]);
//sdata[tid] = max(sdata[tid], dt_ctx.L[i]);
sdata[tid] = max(sdata[tid], data[i]);
}
__syncthreads();