diff --git a/GPUSimulators/CudaContext.py b/GPUSimulators/CudaContext.py index 243469f..6c90636 100644 --- a/GPUSimulators/CudaContext.py +++ b/GPUSimulators/CudaContext.py @@ -44,8 +44,12 @@ Class which keeps track of the CUDA context and some helper functions """ class CudaContext(object): - def __init__(self, blocking=False, use_cache=True, autotuning=True): - self.blocking = blocking + def __init__(self, device=None, context_flags=None, use_cache=True, autotuning=True): + """ + Create a new CUDA context + Set device to an id or pci_bus_id to select a specific GPU + Set context_flags to cuda.ctx_flags.SCHED_BLOCKING_SYNC for a blocking context + """ self.use_cache = use_cache self.logger = logging.getLogger(__name__) self.modules = {} @@ -60,17 +64,19 @@ class CudaContext(object): #Print some info about CUDA self.logger.info("CUDA version %s", str(cuda.get_version())) self.logger.info("Driver version %s", str(cuda.get_driver_version())) - - self.cuda_device = cuda.Device(0) - self.logger.info("Using '%s' GPU", self.cuda_device.name()) + + if device is None: + device = 0 + + self.cuda_device = cuda.Device(device) + self.logger.info("Using device %d/%d '%s' (%s) GPU", device, cuda.Device.count(), self.cuda_device.name(), self.cuda_device.pci_bus_id()) self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability())) # Create the CUDA context - if (self.blocking): - self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC) - self.logger.warning("Using blocking context") - else: - self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO) + if context_flags is None: + context_flags=cuda.ctx_flags.SCHED_AUTO + + self.cuda_context = self.cuda_device.make_context(flags=context_flags) free, total = cuda.mem_get_info() self.logger.debug(" => memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024))) diff --git a/GPUSimulators/MPISimulator.py b/GPUSimulators/MPISimulator.py index fb0093c..e0aa6a7 100644 --- a/GPUSimulators/MPISimulator.py +++ b/GPUSimulators/MPISimulator.py @@ -143,6 +143,55 @@ class MPIGrid(object): out_data = np.empty([self.comm.size] + list(data.shape), dtype=data.dtype) self.comm.Gather(data, out_data, root) return out_data + + def getLocalRank(self): + """ + Returns the local rank on this node for this MPI process + """ + + # This function has been adapted from + # https://github.com/SheffieldML/PyDeepGP/blob/master/deepgp/util/parallel.py + # by Zhenwen Dai released under BSD 3-Clause "New" or "Revised" License: + # + # Copyright (c) 2016, Zhenwen Dai + # All rights reserved. + # + # Redistribution and use in source and binary forms, with or without + # modification, are permitted provided that the following conditions are met: + # + # * Redistributions of source code must retain the above copyright notice, this + # list of conditions and the following disclaimer. + # + # * Redistributions in binary form must reproduce the above copyright notice, + # this list of conditions and the following disclaimer in the documentation + # and/or other materials provided with the distribution. + # + # * Neither the name of DGP nor the names of its + # contributors may be used to endorse or promote products derived from + # this software without specific prior written permission. + # + # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + #Get this ranks unique (physical) node name + node_name = MPI.Get_processor_name() + + #Gather the list of all node names on all nodes + node_names = self.comm.allgather(node_name) + + #Loop over all node names up until our rank + #and count how many duplicates of our nodename we find + local_rank = len([0 for name in node_names[:self.comm.rank] if name==node_name]) + + return local_rank class MPISimulator(Simulator.BaseSimulator): @@ -233,9 +282,7 @@ class MPISimulator(Simulator.BaseSimulator): self.out_n = np.empty_like(self.in_n) self.out_s = np.empty_like(self.in_s) - self.logger.debug("Simlator rank {:d} has neighbors {:s}".format(self.grid.comm.rank, str([self.north, self.south, self.east, self.west]))) - - self.logger.debug("Simlator rank {:d} initialized ".format(self.grid.comm.rank)) + self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name())) def substep(self, dt, step_number): diff --git a/mpiTesting.py b/mpiTesting.py new file mode 100644 index 0000000..52deeaa --- /dev/null +++ b/mpiTesting.py @@ -0,0 +1,137 @@ +# -*- coding: utf-8 -*- + +""" +This python module implements MPI simulations for benchmarking + +Copyright (C) 2018 SINTEF ICT + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +""" + + +import numpy as np +import gc +import time +import json +import logging + +#MPI +from mpi4py import MPI + +#CUDA +import pycuda.driver as cuda + +#Simulator engine etc +from GPUSimulators import MPISimulator, Common, CudaContext +from GPUSimulators import EE2D_KP07_dimsplit +from GPUSimulators.helpers import InitialConditions as IC +from GPUSimulators.Simulator import BoundaryCondition as BC + + +#Get MPI COMM to use +comm = MPI.COMM_WORLD + + +#### +#Initialize logging +#### +log_level_console = 20 +log_level_file = 10 +log_filename = 'mpi_' + str(comm.rank) + '.log' +logger = logging.getLogger('GPUSimulators') +logger.setLevel(min(log_level_console, log_level_file)) + +ch = logging.StreamHandler() +ch.setLevel(log_level_console) +logger.addHandler(ch) +logger.info("Console logger using level %s", logging.getLevelName(log_level_console)) + +fh = logging.FileHandler(log_filename) +formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s: %(message)s') +fh.setFormatter(formatter) +fh.setLevel(log_level_file) +logger.addHandler(fh) +logger.info("File logger using level %s to %s", logging.getLevelName(log_level_file), log_filename) + + + +#### +# Initialize MPI grid etc +#### +logger.info("Creating MPI grid") +grid = MPISimulator.MPIGrid(MPI.COMM_WORLD) + + + +#### +# Initialize CUDA +#### +cuda.init(flags=0) +logger.info("Initializing CUDA") +local_rank = grid.getLocalRank() +num_cuda_devices = cuda.Device.count() +cuda_device = local_rank % num_cuda_devices +cuda_context = CudaContext.CudaContext(device=cuda_device, autotuning=False) + + + +#### +# Set initial conditions +#### +logger.info("Generating initial conditions") +nx = 128 +ny = 128 +gamma = 1.4 +save_times = np.linspace(0, 5.0, 10) +outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc" +save_var_names = ['rho', 'rho_u', 'rho_v', 'E'] + +arguments = IC.genKelvinHelmholtz(nx, ny, gamma, grid=grid) +arguments['context'] = cuda_context +arguments['theta'] = 1.2 +arguments['grid'] = grid + + + + +#### +# Run simulation +#### +logger.info("Running simulation") +#Helper function to create MPI simulator +def genSim(grid, **kwargs): + local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs) + sim = MPISimulator.MPISimulator(local_sim, grid) + return sim +outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names) + + + +#### +# Clean shutdown +#### +sim = None +local_sim = None +cuda_context = None +arguments = None +logging.shutdown() +gc.collect() + + + +#### +# Print completion and exit +#### +print("Completed!") +exit(0) \ No newline at end of file