mirror of
https://github.com/smyalygames/FiniteVolumeGPU.git
synced 2025-09-15 03:32:17 +02:00
289 lines
9.6 KiB
Python
289 lines
9.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
This python module implements the different helper functions and
|
|
classes
|
|
|
|
Copyright (C) 2018 SINTEF ICT
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
"""
|
|
|
|
import os
|
|
import gc
|
|
import numpy as np
|
|
import logging
|
|
from socket import gethostname
|
|
from tqdm.auto import tqdm
|
|
|
|
import pycuda.driver as cuda
|
|
|
|
from GPUSimulators import Simulator
|
|
from GPUSimulators.common import common, Timer
|
|
from GPUSimulators.gpu import CudaContext
|
|
|
|
|
|
def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
|
|
"""
|
|
Runs a benchmark, and returns the number of megacells achieved
|
|
"""
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Initialize simulator
|
|
try:
|
|
sim = simulator(**arguments)
|
|
except:
|
|
# An exception raised - not possible to continue
|
|
logger.debug(f"Failed creating {simulator.__name__} with arguments {str(arguments)}")
|
|
# raise RuntimeError("Failed creating %s with arguments %s", simulator.__name__, str(arguments))
|
|
return np.nan
|
|
|
|
# Create timer events
|
|
start = cuda.Event()
|
|
end = cuda.Event()
|
|
|
|
# Warmup
|
|
for i in range(warmup_timesteps):
|
|
sim.substep(sim.dt, i)
|
|
|
|
# Run simulation with timer
|
|
start.record(sim.stream)
|
|
for i in range(timesteps):
|
|
sim.substep(sim.dt, i)
|
|
end.record(sim.stream)
|
|
|
|
# Synchronize end event
|
|
end.synchronize()
|
|
|
|
# Compute megacells
|
|
gpu_elapsed = end.time_since(start) * 1.0e-3
|
|
megacells = (sim.nx * sim.ny * timesteps / (1000 * 1000)) / gpu_elapsed
|
|
|
|
# Sanity check solution
|
|
h, hu, hv = sim.download()
|
|
sane = True
|
|
sane = sane and sanity_check(0.3, 0.7)
|
|
sane = sane and sanity_check(-0.2, 0.2)
|
|
sane = sane and sanity_check(-0.2, 0.2)
|
|
|
|
if sane:
|
|
logger.debug("%s [%d x %d] succeeded: %f megacells, gpu elapsed %f", simulator.__name__,
|
|
arguments["block_width"], arguments["block_height"], megacells, gpu_elapsed)
|
|
return megacells
|
|
else:
|
|
logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"],
|
|
arguments["block_height"], gpu_elapsed)
|
|
# raise RuntimeError("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
|
|
return np.nan
|
|
|
|
|
|
def gen_test_data(nx, ny, g):
|
|
"""
|
|
Generates test dataset
|
|
"""
|
|
|
|
width = 100.0
|
|
height = 100.0
|
|
dx = width / float(nx)
|
|
dy = height / float(ny)
|
|
|
|
x_center = dx * nx / 2.0
|
|
y_center = dy * ny / 2.0
|
|
|
|
# Create a gaussian "dam break" that will not form shocks
|
|
size = width / 5.0
|
|
dt = 10 ** 10
|
|
|
|
h = np.zeros((ny, nx), dtype=np.float32)
|
|
hu = np.zeros((ny, nx), dtype=np.float32)
|
|
hv = np.zeros((ny, nx), dtype=np.float32)
|
|
|
|
extent = 1.0 / np.sqrt(2.0)
|
|
x = (dx * (np.arange(0, nx, dtype=np.float32) + 0.5) - x_center) / size
|
|
y = (dy * (np.arange(0, ny, dtype=np.float32) + 0.5) - y_center) / size
|
|
xv, yv = np.meshgrid(x, y, sparse=False, indexing='xy')
|
|
r = np.minimum(1.0, np.sqrt(xv ** 2 + yv ** 2))
|
|
xv = None
|
|
yv = None
|
|
gc.collect()
|
|
|
|
# Generate highres
|
|
cos = np.cos(np.pi * r)
|
|
h = 0.5 + 0.1 * 0.5 * (1.0 + cos)
|
|
hu = 0.1 * 0.5 * (1.0 + cos)
|
|
hv = hu.copy()
|
|
|
|
scale = 0.7
|
|
max_h_estimate = 0.6
|
|
max_u_estimate = 0.1 * np.sqrt(2.0)
|
|
dx = width / nx
|
|
dy = height / ny
|
|
dt = scale * min(dx, dy) / (max_u_estimate + np.sqrt(g * max_h_estimate))
|
|
|
|
return h, hu, hv, dx, dy, dt
|
|
|
|
|
|
def sanity_check(variable, bound_min, bound_max):
|
|
"""
|
|
Checks that a variable is "sane"
|
|
"""
|
|
|
|
maxval = np.amax(variable)
|
|
minval = np.amin(variable)
|
|
if (np.isnan(maxval)
|
|
or np.isnan(minval)
|
|
or maxval > bound_max
|
|
or minval < bound_min):
|
|
return False
|
|
else:
|
|
return True
|
|
|
|
|
|
def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
|
|
"""
|
|
Runs a set of benchmarks for a single simulator
|
|
"""
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
megacells = np.empty((len(block_heights), len(block_widths)))
|
|
megacells.fill(np.nan)
|
|
|
|
logger.debug("Running %d benchmarks with %s", len(block_heights) * len(block_widths), simulator.__name__)
|
|
|
|
sim_arguments = arguments.copy()
|
|
|
|
with Timer(simulator.__name__) as t:
|
|
for j, block_height in enumerate(tqdm(block_heights, desc='Autotuner Progress')):
|
|
sim_arguments.update({'block_height': block_height})
|
|
for i, block_width in enumerate(tqdm(block_widths, desc=f'Iteration {j} Progress', leave=False)):
|
|
sim_arguments.update({'block_width': block_width})
|
|
megacells[j, i] = run_benchmark(sim_arguments)
|
|
|
|
logger.debug("Completed %s in %f seconds", simulator.__name__, t.secs)
|
|
|
|
return megacells
|
|
|
|
|
|
class Autotuner:
|
|
def __init__(self,
|
|
nx=2048, ny=2048,
|
|
block_widths=range(8, 32, 1),
|
|
block_heights=range(8, 32, 1)):
|
|
logger = logging.getLogger(__name__)
|
|
self.filename = f"autotuning_data_{gethostname()}.npz"
|
|
self.nx = nx
|
|
self.ny = ny
|
|
self.block_widths = block_widths
|
|
self.block_heights = block_heights
|
|
self.performance = {}
|
|
|
|
def benchmark(self, simulator, force=False):
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Run through simulators and benchmark
|
|
key = str(simulator.__name__)
|
|
logger.info(f"Benchmarking {key} to {self.filename}")
|
|
|
|
# If this simulator has been benchmarked already, skip it
|
|
if force == False and os.path.isfile(self.filename):
|
|
with np.load(self.filename) as data:
|
|
if key in data["simulators"]:
|
|
logger.info(f"{key} already benchmarked - skipping")
|
|
return
|
|
|
|
# Set arguments to send to the simulators during construction
|
|
context = CudaContext(autotuning=False)
|
|
g = 9.81
|
|
h0, hu0, hv0, dx, dy, dt = gen_test_data(ny=self.ny, g=g)
|
|
arguments = {
|
|
'context': context,
|
|
'h0': h0, 'hu0': hu0, 'hv0': hv0,
|
|
'nx': self.nx, 'ny': self.ny,
|
|
'dx': dx, 'dy': dy, 'dt': 0.9 * dt,
|
|
'g': g,
|
|
'compile_opts': ['-Wno-deprecated-gpu-targets']
|
|
}
|
|
|
|
# Load existing data into memory
|
|
benchmark_data = {
|
|
"simulators": [],
|
|
}
|
|
if os.path.isfile(self.filename):
|
|
with np.load(self.filename) as data:
|
|
for k, v in data.items():
|
|
benchmark_data[k] = v
|
|
|
|
# Run benchmark
|
|
benchmark_data[key + "_megacells"] = benchmark_single_simulator(arguments, self.block_widths,
|
|
self.block_heights)
|
|
benchmark_data[key + "_block_widths"] = self.block_widths
|
|
benchmark_data[key + "_block_heights"] = self.block_heights
|
|
benchmark_data[key + "_arguments"] = str(arguments)
|
|
|
|
existing_sims = benchmark_data["simulators"]
|
|
if isinstance(existing_sims, np.ndarray):
|
|
existing_sims = existing_sims.tolist()
|
|
if key not in existing_sims:
|
|
benchmark_data["simulators"] = existing_sims + [key]
|
|
|
|
# Save to file
|
|
np.savez_compressed(self.filename, **benchmark_data)
|
|
|
|
def get_peak_performance(self, simulator):
|
|
"""
|
|
Function which reads a numpy file with autotuning data
|
|
and reports the maximum performance and block size
|
|
"""
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
assert issubclass(simulator, Simulator.BaseSimulator)
|
|
key = simulator.__name__
|
|
|
|
if key in self.performance:
|
|
return self.performance[key]
|
|
else:
|
|
# Run simulation if required
|
|
if not os.path.isfile(self.filename):
|
|
logger.debug(f"Could not get autotuned peak performance for {key}: benchmarking")
|
|
self.benchmark(simulator)
|
|
|
|
with np.load(self.filename) as data:
|
|
if key not in data['simulators']:
|
|
logger.debug(f"Could not get autotuned peak performance for {key}: benchmarking")
|
|
data.close()
|
|
self.benchmark(simulator)
|
|
data = np.load(self.filename)
|
|
|
|
def find_max_index(megacells):
|
|
max_index = np.nanargmax(megacells)
|
|
return np.unravel_index(max_index, megacells.shape)
|
|
|
|
megacells = data[key + '_megacells']
|
|
block_widths = data[key + '_block_widths']
|
|
block_heights = data[key + '_block_heights']
|
|
j, i = find_max_index(megacells)
|
|
|
|
self.performance[key] = {"block_width": block_widths[i],
|
|
"block_height": block_heights[j],
|
|
"megacells": megacells[j, i]}
|
|
logger.debug(f"Returning {self.performance[key]} as peak performance parameters")
|
|
return self.performance[key]
|
|
|
|
# This should never happen
|
|
raise "Something wrong: Could not get autotuning data!"
|
|
return None
|