14 Commits

Author SHA1 Message Date
Anthony Berg
6d9f36968d Merge remote-tracking branch 'origin/build/rocm-upgrade' into build/rocm-upgrade 2025-03-30 18:40:46 +02:00
Anthony Berg
5b925cdb42 refactor: change MPI functions into variables 2025-03-30 18:40:38 +02:00
Anthony Berg
b054a4dbcd Delete GPUSimulators/helpers/__pycache__ directory 2025-03-30 18:22:38 +02:00
Anthony Berg
2e5cf88eef Merge remote-tracking branch 'origin/build/rocm-upgrade' into build/rocm-upgrade
# Conflicts:
#	GPUSimulators/Simulator.py
2025-03-30 17:45:16 +02:00
Anthony Berg
80afd31286 refactor: change how variables are called in for loop 2025-03-30 17:44:33 +02:00
Anthony Berg
e2306406a7 fix: floating point number practically causing an infinite loop 2025-03-30 17:43:52 +02:00
Anthony Berg
aa21733806 fix: floating point number practically causing an infinite loop 2025-03-29 22:16:55 +01:00
Anthony Berg
5a27445de8 fix: deprecated modules on LUMI
(cherry picked from commit 277a6b4a3c)
2025-03-26 14:56:55 +01:00
Anthony Berg
cd69f69080 feat: add tqdm progress bar 2025-03-18 07:41:52 +01:00
Anthony Berg
9761ff4924 fix: touples not being assinged 2025-03-18 07:41:38 +01:00
Anthony Berg
5931cee93f build: update deps 2025-03-18 07:41:19 +01:00
Anthony Berg
208d82ab0b feat: add .gitignore 2025-03-17 14:48:46 +01:00
Hicham Agueny
31bf80c6f0 Update README.md: install rocm with easybuild 2024-09-24 14:49:23 +02:00
Hicham Agueny
4df5e5853f Merge pull request #1 from HichamAgueny/implement-hip-python
Implement hip python
2024-06-09 23:26:35 +02:00
8 changed files with 387 additions and 95 deletions

276
.gitignore vendored Normal file
View File

@@ -0,0 +1,276 @@
.vscode/settings.json
/data
# Numpy Zipped
*.npz
# NetCDF
*.nc
# Python Related files
# Taken from: https://github.com/github/gitignore/blob/main/Python.gitignore
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# CUDA
cuda_cache/
# Taken from: https://github.com/github/gitignore/blob/main/CUDA.gitignore
*.i
*.ii
*.gpu
*.ptx
*.cubin
*.fatbin
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# Taken from: https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

View File

@@ -35,6 +35,8 @@ import gc
import netCDF4 import netCDF4
import json import json
from tqdm import tqdm
#import pycuda.compiler as cuda_compiler #import pycuda.compiler as cuda_compiler
#import pycuda.gpuarray #import pycuda.gpuarray
#import pycuda.driver as cuda #import pycuda.driver as cuda
@@ -178,11 +180,11 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
profiling_data_sim_runner["end"]["t_sim_init"] = time.time() profiling_data_sim_runner["end"]["t_sim_init"] = time.time()
#Start simulation loop #Start simulation loop
progress_printer = ProgressPrinter(save_times[-1], print_every=10) # progress_printer = ProgressPrinter(save_times[-1], print_every=10)
for k in range(len(save_times)): for k, t_step in tqdm(enumerate(t_steps), desc="Simulation Loop"):
#Get target time and step size there #Get target time and step size there
t_step = t_steps[k] # t_step = t_steps[k]
t_end = save_times[k] # t_end = save_times[k]
#Sanity check simulator #Sanity check simulator
try: try:
@@ -194,7 +196,7 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
profiling_data_sim_runner["start"]["t_full_step"] += time.time() profiling_data_sim_runner["start"]["t_full_step"] += time.time()
#Simulate #Simulate
if (t_step > 0.0): if t_step > 0.0:
sim.simulate(t_step, dt) sim.simulate(t_step, dt)
profiling_data_sim_runner["end"]["t_full_step"] += time.time() profiling_data_sim_runner["end"]["t_full_step"] += time.time()
@@ -211,11 +213,11 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
profiling_data_sim_runner["end"]["t_nc_write"] += time.time() profiling_data_sim_runner["end"]["t_nc_write"] += time.time()
#Write progress to screen #Write progress to screen
print_string = progress_printer.getPrintString(t_end) # print_string = progress_printer.getPrintString(t_end)
if (print_string): # if (print_string):
logger.debug(print_string) # logger.debug(print_string)
logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(t_end, sim.simSteps(), sim.simTime() / sim.simSteps())) logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(save_times[-1], sim.simSteps(), sim.simTime() / sim.simSteps()))
return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi
#return outdata.filename #return outdata.filename
@@ -306,7 +308,7 @@ class IPEngine(object):
import ipyparallel import ipyparallel
self.cluster = ipyparallel.Client()#profile='mpi') self.cluster = ipyparallel.Client()#profile='mpi')
time.sleep(3) time.sleep(3)
while(len(self.cluster.ids) != n_engines): while len(self.cluster.ids) != n_engines:
time.sleep(0.5) time.sleep(0.5)
self.logger.info("Waiting for cluster...") self.logger.info("Waiting for cluster...")
self.cluster = ipyparallel.Client()#profile='mpi') self.cluster = ipyparallel.Client()#profile='mpi')
@@ -433,58 +435,58 @@ class DataDumper(object):
class ProgressPrinter(object): # class ProgressPrinter(object):
""" # """
Small helper class for # Small helper class for
""" # """
def __init__(self, total_steps, print_every=5): # def __init__(self, total_steps, print_every=5):
self.logger = logging.getLogger(__name__) # self.logger = logging.getLogger(__name__)
self.start = time.time() # self.start = time.time()
self.total_steps = total_steps # self.total_steps = total_steps
self.print_every = print_every # self.print_every = print_every
self.next_print_time = self.print_every # self.next_print_time = self.print_every
self.last_step = 0 # self.last_step = 0
self.secs_per_iter = None # self.secs_per_iter = None
def getPrintString(self, step): # def getPrintString(self, step):
elapsed = time.time() - self.start # elapsed = time.time() - self.start
if (elapsed > self.next_print_time): # if (elapsed > self.next_print_time):
dt = elapsed - (self.next_print_time - self.print_every) # dt = elapsed - (self.next_print_time - self.print_every)
dsteps = step - self.last_step # dsteps = step - self.last_step
steps_remaining = self.total_steps - step # steps_remaining = self.total_steps - step
if (dsteps == 0): # if (dsteps == 0):
return # return
self.last_step = step # self.last_step = step
self.next_print_time = elapsed + self.print_every # self.next_print_time = elapsed + self.print_every
if not self.secs_per_iter: # if not self.secs_per_iter:
self.secs_per_iter = dt / dsteps # self.secs_per_iter = dt / dsteps
self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps) # self.secs_per_iter = 0.2*self.secs_per_iter + 0.8*(dt / dsteps)
remaining_time = steps_remaining * self.secs_per_iter # remaining_time = steps_remaining * self.secs_per_iter
return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format( # return "{:s}. Total: {:s}, elapsed: {:s}, remaining: {:s}".format(
ProgressPrinter.progressBar(step, self.total_steps), # ProgressPrinter.progressBar(step, self.total_steps),
ProgressPrinter.timeString(elapsed + remaining_time), # ProgressPrinter.timeString(elapsed + remaining_time),
ProgressPrinter.timeString(elapsed), # ProgressPrinter.timeString(elapsed),
ProgressPrinter.timeString(remaining_time)) # ProgressPrinter.timeString(remaining_time))
def timeString(seconds): # def timeString(seconds):
seconds = int(max(seconds, 1)) # seconds = int(max(seconds, 1))
minutes, seconds = divmod(seconds, 60) # minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60) # hours, minutes = divmod(minutes, 60)
periods = [('h', hours), ('m', minutes), ('s', seconds)] # periods = [('h', hours), ('m', minutes), ('s', seconds)]
time_string = ' '.join('{}{}'.format(value, name) # time_string = ' '.join('{}{}'.format(value, name)
for name, value in periods # for name, value in periods
if value) # if value)
return time_string # return time_string
def progressBar(step, total_steps, width=30): # def progressBar(step, total_steps, width=30):
progress = np.round(width * step / total_steps).astype(np.int32) # progress = np.round(width * step / total_steps).astype(np.int32)
progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%" # progressbar = "0% [" + "#"*(progress) + "="*(width-progress) + "] 100%"
return progressbar # return progressbar
""" """

View File

@@ -25,6 +25,7 @@ import numpy as np
import math import math
import logging import logging
from enum import IntEnum from enum import IntEnum
from tqdm import tqdm
#import pycuda.compiler as cuda_compiler #import pycuda.compiler as cuda_compiler
#import pycuda.gpuarray #import pycuda.gpuarray
@@ -156,7 +157,7 @@ class BaseSimulator(object):
self.num_substeps = num_substeps self.num_substeps = num_substeps
#Handle autotuning block size #Handle autotuning block size
if (self.context.autotuner): if self.context.autotuner:
peak_configuration = self.context.autotuner.get_peak_performance(self.__class__) peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
block_width = int(peak_configuration["block_width"]) block_width = int(peak_configuration["block_width"])
block_height = int(peak_configuration["block_height"]) block_height = int(peak_configuration["block_height"])
@@ -195,27 +196,28 @@ class BaseSimulator(object):
Requires that the step() function is implemented in the subclasses Requires that the step() function is implemented in the subclasses
""" """
printer = Common.ProgressPrinter(t) # printer = Common.ProgressPrinter(t)
t_start = self.simTime() t_start = self.simTime()
t_end = t_start + t t_end = t_start + t
update_dt = True update_dt = True
if (dt is not None): if dt is not None:
update_dt = False update_dt = False
self.dt = dt self.dt = dt
while(self.simTime() < t_end): for _ in tqdm(range(math.ceil((t_end - t_start) / self.dt)), desc="Simulation"):
# Update dt every 100 timesteps and cross your fingers it works # Update dt every 100 timesteps and cross your fingers it works
# for the next 100 # for the next 100
if (update_dt and (self.simSteps() % 100 == 0)): # TODO this is probably broken now after fixing the "infinite" loop
if update_dt and (self.simSteps() % 100 == 0):
self.dt = self.computeDt()*self.cfl_scale self.dt = self.computeDt()*self.cfl_scale
# Compute timestep for "this" iteration (i.e., shorten last timestep) # Compute timestep for "this" iteration (i.e., shorten last timestep)
current_dt = np.float32(min(self.dt, t_end-self.simTime())) current_dt = np.float32(min(self.dt, t_end-self.simTime()))
# Stop if end reached (should not happen) # Stop if end reached (should not happen)
if (current_dt <= 0.0): if current_dt <= 0.0:
self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.simSteps())) self.logger.warning("Timestep size {:d} is less than or equal to zero!".format(self.simSteps()))
break break
@@ -223,14 +225,16 @@ class BaseSimulator(object):
self.step(current_dt) self.step(current_dt)
#Print info #Print info
print_string = printer.getPrintString(self.simTime() - t_start) # print_string = printer.getPrintString(self.simTime() - t_start)
if (print_string): # if (print_string):
self.logger.info("%s: %s", self, print_string) # self.logger.info("%s: %s", self, print_string)
try: # try:
self.check() # self.check()
except AssertionError as e: # except AssertionError as e:
e.args += ("Step={:d}, time={:f}".format(self.simSteps(), self.simTime()),) # e.args += ("Step={:d}, time={:f}".format(self.simSteps(), self.simTime()),)
raise # raise
print("Done")
def step(self, dt): def step(self, dt):

View File

@@ -19,18 +19,18 @@ Mydir=/project/project_4650000xx
Myapplication=${Mydir}/FiniteVolumeGPU_hip/mpiTesting.py Myapplication=${Mydir}/FiniteVolumeGPU_hip/mpiTesting.py
#modules #modules
ml LUMI/23.03 partition/G ml LUMI/24.03 partition/G
ml lumi-container-wrapper ml lumi-container-wrapper
ml cray-python/3.9.13.1 ml cray-python/3.11.7
ml rocm/5.2.3 ml rocm/6.2.2
ml craype-accel-amd-gfx90a ml craype-accel-amd-gfx90a
ml cray-mpich/8.1.27 ml cray-mpich/8.1.29
export PATH="/project/project_4650000xx/FiniteVolumeGPU_hip/MyCondaEnv/bin:$PATH" export PATH="/project/project_4650000xx/FiniteVolumeGPU_hip/MyCondaEnv/bin:$PATH"
#missing library #missing library
export LD_LIBRARY_PATH=/opt/cray/pe/mpich/8.1.27/ofi/cray/14.0/lib-abi-mpich:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/cray/pe/mpich/8.1.29/ofi/cray/17.0/lib-abi-mpich:$LD_LIBRARY_PATH
#Binding mask #Binding mask
bind_mask="0x${fe}000000000000,0x${fe}00000000000000,0x${fe}0000,0x${fe}000000,0x${fe},0x${fe}00,0x${fe}00000000,0x${fe}0000000000" bind_mask="0x${fe}000000000000,0x${fe}00000000000000,0x${fe}0000,0x${fe}000000,0x${fe},0x${fe}00,0x${fe}00000000,0x${fe}0000000000"

View File

@@ -1,25 +1,31 @@
# FiniteVolumeGPU # FiniteVolumeGPU
This is a HIP version of the [FiniteVolume code](https://github.com/babrodtk/FiniteVolumeGPU) (work in progress). It is a Python software package that implements several finite volume discretizations on Cartesian grids for the shallow water equations and the Euler equations. This is a HIP version of the [FiniteVolume code](https://github.com/babrodtk/FiniteVolumeGPU). It is a Python software package that implements several finite volume discretizations on Cartesian grids for the shallow water equations and the Euler equations.
## Setup on LUMI-G ## Setup on LUMI-G
Here is a step-by-step guide on installing packages on LUMI-G Here is a step-by-step guide on installing packages on LUMI-G
### Step 0: load modules ### Step 1: Install rocm-5.4.6 with Easybuild
``` ```
ml LUMI/23.03 partition/G export EBU_USER_PREFIX=/project/project_xxxxxx/EasyBuild
ml lumi-container-wrapper ml LUMI/24.03 partition/G
ml cray-python/3.9.13.1 ml EasyBuild-user
export PYTHONIOENCODING=utf-8
eb rocm-5.4.6.eb -r
``` ```
### Step 1: run conda-container ### Step 2: run conda-container
Installation via conda can be done as: Installation via conda can be done as:
``` ```
ml LUMI/24.03 partition/G
ml lumi-container-wrapper/0.3.3-cray-python-3.11.7
```
```
conda-containerize new --prefix MyCondaEnv conda_environment_lumi.yml conda-containerize new --prefix MyCondaEnv conda_environment_lumi.yml
``` ```
where the file `conda_environment_lumi.yml` contains packages to be installed. where the file `conda_environment_lumi.yml` contains packages to be installed.
### Step 2: Set the env. variable to search for binaries ### Step 3: Set the env. variable to search for binaries
``` ```
export the bin path: export PATH="$PWD/MyCondaEnv/bin:$PATH" export the bin path: export PATH="$PWD/MyCondaEnv/bin:$PATH"
``` ```

View File

@@ -5,15 +5,17 @@ channels:
- conda-forge - conda-forge
dependencies: dependencies:
- python=3.9.13 - python=3.11.7
- pip
- numpy - numpy
- mpi4py - mpi4py
- six - six
- pytools - pytools
- netcdf4 - netcdf4
- scipy - scipy
- tqdm
- pip: - pip:
- hip-python==5.4.3.470.16 - hip-python==6.2.0.499.16
- -i https://test.pypi.org/simple/ - -i https://test.pypi.org/simple/

View File

@@ -70,7 +70,7 @@ def hip_check(call_result):
args = parser.parse_args() args = parser.parse_args()
if(args.profile): if args.profile:
profiling_data = {} profiling_data = {}
# profiling: total run time # profiling: total run time
t_total_start = time.time() t_total_start = time.time()
@@ -79,6 +79,8 @@ if(args.profile):
# Get MPI COMM to use # Get MPI COMM to use
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()
#### ####
@@ -86,7 +88,7 @@ comm = MPI.COMM_WORLD
#### ####
log_level_console = 20 log_level_console = 20
log_level_file = 10 log_level_file = 10
log_filename = 'mpi_' + str(comm.rank) + '.log' log_filename = 'mpi_' + str(rank) + '.log'
logger = logging.getLogger('GPUSimulators') logger = logging.getLogger('GPUSimulators')
logger.setLevel(min(log_level_console, log_level_file)) logger.setLevel(min(log_level_console, log_level_file))
@@ -110,7 +112,7 @@ logger.info("File logger using level %s to %s",
# Initialize MPI grid etc # Initialize MPI grid etc
#### ####
logger.info("Creating MPI grid") logger.info("Creating MPI grid")
grid = MPISimulator.MPIGrid(MPI.COMM_WORLD) grid = MPISimulator.MPIGrid(comm)
""" """
job_id = int(os.environ["SLURM_JOB_ID"]) job_id = int(os.environ["SLURM_JOB_ID"])
@@ -152,7 +154,7 @@ gamma = 1.4
#save_times = np.linspace(0, 0.000099, 11) #save_times = np.linspace(0, 0.000099, 11)
#save_times = np.linspace(0, 0.000099, 2) #save_times = np.linspace(0, 0.000099, 2)
save_times = np.linspace(0, 0.0000999, 2) save_times = np.linspace(0, 0.0000999, 2)
outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc" outfile = "mpi_out_" + str(rank) + ".nc"
save_var_names = ['rho', 'rho_u', 'rho_v', 'E'] save_var_names = ['rho', 'rho_u', 'rho_v', 'E']
arguments = IC.genKelvinHelmholtz(nx, ny, gamma, grid=grid) arguments = IC.genKelvinHelmholtz(nx, ny, gamma, grid=grid)
@@ -160,7 +162,7 @@ arguments['context'] = cuda_context
arguments['theta'] = 1.2 arguments['theta'] = 1.2
arguments['grid'] = grid arguments['grid'] = grid
if(args.profile): if args.profile:
t_init_end = time.time() t_init_end = time.time()
t_init = t_init_end - t_init_start t_init = t_init_end - t_init_start
profiling_data["t_init"] = t_init profiling_data["t_init"] = t_init
@@ -178,17 +180,17 @@ def genSim(grid, **kwargs):
return sim return sim
outfile, sim_runner_profiling_data, sim_profiling_data = Common.runSimulation( (outfile, sim_runner_profiling_data, sim_profiling_data) = Common.runSimulation(
genSim, arguments, outfile, save_times, save_var_names, dt) genSim, arguments, outfile, save_times, save_var_names, dt)
if(args.profile): if args.profile:
t_total_end = time.time() t_total_end = time.time()
t_total = t_total_end - t_total_start t_total = t_total_end - t_total_start
profiling_data["t_total"] = t_total profiling_data["t_total"] = t_total
print("Total run time on rank " + str(MPI.COMM_WORLD.rank) + " is " + str(t_total) + " s") print("Total run time on rank " + str(rank) + " is " + str(t_total) + " s")
# write profiling to json file # write profiling to json file
if(args.profile and MPI.COMM_WORLD.rank == 0): if args.profile and rank == 0:
job_id = "" job_id = ""
if "SLURM_JOB_ID" in os.environ: if "SLURM_JOB_ID" in os.environ:
job_id = int(os.environ["SLURM_JOB_ID"]) job_id = int(os.environ["SLURM_JOB_ID"])
@@ -199,7 +201,7 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):
str(job_id) + "_" + str(allocated_nodes) + "_nodes_and_" + str(allocated_gpus) + "_GPUs_profiling.json" str(job_id) + "_" + str(allocated_nodes) + "_nodes_and_" + str(allocated_gpus) + "_GPUs_profiling.json"
profiling_data["outfile"] = outfile profiling_data["outfile"] = outfile
else: else:
profiling_file = "MPI_" + str(MPI.COMM_WORLD.size) + "_procs_and_" + str(num_cuda_devices) + "_GPUs_profiling.json" profiling_file = "MPI_" + str(size) + "_procs_and_" + str(num_cuda_devices) + "_GPUs_profiling.json"
for stage in sim_runner_profiling_data["start"].keys(): for stage in sim_runner_profiling_data["start"].keys():
profiling_data[stage] = sim_runner_profiling_data["end"][stage] - sim_runner_profiling_data["start"][stage] profiling_data[stage] = sim_runner_profiling_data["end"][stage] - sim_runner_profiling_data["start"][stage]
@@ -214,7 +216,7 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):
profiling_data["slurm_job_id"] = job_id profiling_data["slurm_job_id"] = job_id
profiling_data["n_cuda_devices"] = str(num_cuda_devices) profiling_data["n_cuda_devices"] = str(num_cuda_devices)
profiling_data["n_processes"] = str(MPI.COMM_WORLD.size) profiling_data["n_processes"] = str(size)
profiling_data["git_hash"] = Common.getGitHash() profiling_data["git_hash"] = Common.getGitHash()
profiling_data["git_status"] = Common.getGitStatus() profiling_data["git_status"] = Common.getGitStatus()