# -*- coding: utf-8 -*- """ This python module implements MPI simulations for benchmarking Copyright (C) 2018 SINTEF ICT This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . """ import numpy as np import gc import time import json import logging import os #GPU-aware MPI """ from os import environ if environ.get("MPICH_GPU_SUPPORT_ENABLED", False): from ctypes import CDLL, RTLD_GLOBAL CDLL(f"{environ.get('CRAY_MPICH_ROOTDIR')}/gtl/lib/libmpi_gtl_hsa.so", mode=RTLD_GLOBAL) """ # MPI from mpi4py import MPI # CUDA #import pycuda.driver as cuda from hip import hip,hiprtc # Simulator engine etc from GPUSimulators import MPISimulator, Common, CudaContext from GPUSimulators import EE2D_KP07_dimsplit from GPUSimulators.helpers import InitialConditions as IC from GPUSimulators.Simulator import BoundaryCondition as BC import argparse parser = argparse.ArgumentParser(description='Strong and weak scaling experiments.') parser.add_argument('-nx', type=int, default=128) parser.add_argument('-ny', type=int, default=128) parser.add_argument('--profile', action='store_true') # default: False def hip_check(call_result): err = call_result[0] result = call_result[1:] if len(result) == 1: result = result[0] if isinstance(err, hip.hipError_t) and err != hip.hipError_t.hipSuccess: raise RuntimeError(str(err)) elif ( isinstance(err, hiprtc.hiprtcResult) and err != hiprtc.hiprtcResult.HIPRTC_SUCCESS ): raise RuntimeError(str(err)) return result args = parser.parse_args() if args.profile: profiling_data = {} # profiling: total run time t_total_start = time.time() t_init_start = time.time() # Get MPI COMM to use comm = MPI.COMM_WORLD size = comm.Get_size() rank = comm.Get_rank() #### # Initialize logging #### log_level_console = 20 log_level_file = 10 log_filename = 'mpi_' + str(rank) + '.log' logger = logging.getLogger('GPUSimulators') logger.setLevel(min(log_level_console, log_level_file)) ch = logging.StreamHandler() ch.setLevel(log_level_console) logger.addHandler(ch) logger.info("Console logger using level %s", logging.getLevelName(log_level_console)) fh = logging.FileHandler(log_filename) formatter = logging.Formatter( '%(asctime)s:%(name)s:%(levelname)s: %(message)s') fh.setFormatter(formatter) fh.setLevel(log_level_file) logger.addHandler(fh) logger.info("File logger using level %s to %s", logging.getLevelName(log_level_file), log_filename) #### # Initialize MPI grid etc #### logger.info("Creating MPI grid") grid = MPISimulator.MPIGrid(comm) """ job_id = int(os.environ["SLURM_JOB_ID"]) allocated_nodes = int(os.environ["SLURM_JOB_NUM_NODES"]) allocated_gpus = int(os.environ["ROCR_VISIBLE_DEVICES"].count(",") + 1) print("job_id:", job_id) print("allocated_nodes", allocated_nodes) print("allocated_gpus", allocated_gpus) """ #### # Initialize CUDA #### #cuda.init(flags=0) #logger.info("Initializing CUDA") local_rank = grid.getLocalRank() #num_cuda_devices = cuda.Device.count() num_cuda_devices = hip_check(hip.hipGetDeviceCount()) cuda_device = local_rank % num_cuda_devices logger.info("Process %s using CUDA device %s", str(local_rank), str(cuda_device)) cuda_context = CudaContext.CudaContext(device=cuda_device, autotuning=False) #### # Set initial conditions #### # DEBUGGING - setting random seed np.random.seed(42) logger.info("Generating initial conditions") nx = args.nx ny = args.ny dt = 0.000001 gamma = 1.4 #save_times = np.linspace(0, 0.000009, 2) #save_times = np.linspace(0, 0.000099, 11) #save_times = np.linspace(0, 0.000099, 2) save_times = np.linspace(0, 0.0000999, 2) outfile = "mpi_out_" + str(rank) + ".nc" save_var_names = ['rho', 'rho_u', 'rho_v', 'E'] arguments = IC.genKelvinHelmholtz(nx, ny, gamma, grid=grid) arguments['context'] = cuda_context arguments['theta'] = 1.2 arguments['grid'] = grid if args.profile: t_init_end = time.time() t_init = t_init_end - t_init_start profiling_data["t_init"] = t_init #### # Run simulation #### logger.info("Running simulation") # Helper function to create MPI simulator def genSim(grid, **kwargs): local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs) sim = MPISimulator.MPISimulator(local_sim, grid) return sim (outfile, sim_runner_profiling_data, sim_profiling_data) = Common.runSimulation( genSim, arguments, outfile, save_times, save_var_names, dt) if args.profile: t_total_end = time.time() t_total = t_total_end - t_total_start profiling_data["t_total"] = t_total print("Total run time on rank " + str(rank) + " is " + str(t_total) + " s") # write profiling to json file if args.profile and rank == 0: job_id = "" if "SLURM_JOB_ID" in os.environ: job_id = int(os.environ["SLURM_JOB_ID"]) allocated_nodes = int(os.environ["SLURM_JOB_NUM_NODES"]) allocated_gpus = int(os.environ["ROCR_VISIBLE_DEVICES"].count(",") + 1) # allocated_gpus = int(os.environ["CUDA_VISIBLE_DEVICES"].count(",") + 1) profiling_file = "MPI_jobid_" + \ str(job_id) + "_" + str(allocated_nodes) + "_nodes_and_" + str(allocated_gpus) + "_GPUs_profiling.json" profiling_data["outfile"] = outfile else: profiling_file = "MPI_" + str(size) + "_procs_and_" + str(num_cuda_devices) + "_GPUs_profiling.json" for stage in sim_runner_profiling_data["start"].keys(): profiling_data[stage] = sim_runner_profiling_data["end"][stage] - sim_runner_profiling_data["start"][stage] for stage in sim_profiling_data["start"].keys(): profiling_data[stage] = sim_profiling_data["end"][stage] - sim_profiling_data["start"][stage] profiling_data["nx"] = nx profiling_data["ny"] = ny profiling_data["dt"] = dt profiling_data["n_time_steps"] = sim_profiling_data["n_time_steps"] profiling_data["slurm_job_id"] = job_id profiling_data["n_cuda_devices"] = str(num_cuda_devices) profiling_data["n_processes"] = str(size) profiling_data["git_hash"] = Common.getGitHash() profiling_data["git_status"] = Common.getGitStatus() with open(profiling_file, "w") as write_file: json.dump(profiling_data, write_file) #### # Clean shutdown #### sim = None local_sim = None cuda_context = None arguments = None logging.shutdown() gc.collect() #### # Print completion and exit #### print("Completed!") exit(0)