mirror of
https://github.com/smyalygames/FiniteVolumeGPU_HIP.git
synced 2025-12-24 13:29:17 +01:00
hip-python implementation
This commit is contained in:
@@ -24,6 +24,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <float.h>
|
||||
|
||||
/**
|
||||
* Float3 operators
|
||||
@@ -86,9 +88,6 @@ __device__ float desingularize(float x_, float eps_) {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns the step stored in the leftmost 16 bits
|
||||
* of the 32 bit step-order integer
|
||||
@@ -497,14 +496,18 @@ __device__ void memset(float Q[vars][shmem_height][shmem_width], float value) {
|
||||
|
||||
|
||||
template <unsigned int threads>
|
||||
__device__ void reduce_max(float* data, unsigned int n) {
|
||||
//__device__ void reduce_max(float* data, unsigned int n) {
|
||||
__device__ float reduce_max(float* data, unsigned int n) {
|
||||
__shared__ float sdata[threads];
|
||||
unsigned int tid = threadIdx.x;
|
||||
|
||||
//Reduce to "threads" elements
|
||||
sdata[tid] = FLT_MIN;
|
||||
for (unsigned int i=tid; i<n; i += threads) {
|
||||
sdata[tid] = max(sdata[tid], dt_ctx.L[i]);
|
||||
|
||||
//sdata[tid] = max(sdata[tid], dt_ctx.L[i]);
|
||||
sdata[tid] = max(sdata[tid], data[i]);
|
||||
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user