diff --git a/GPUSimulators/CudaContext.py b/GPUSimulators/CudaContext.py
index 243469f..6c90636 100644
--- a/GPUSimulators/CudaContext.py
+++ b/GPUSimulators/CudaContext.py
@@ -44,8 +44,12 @@ Class which keeps track of the CUDA context and some helper functions
 """
 class CudaContext(object):
     
-    def __init__(self, blocking=False, use_cache=True, autotuning=True):
-        self.blocking = blocking
+    def __init__(self, device=None, context_flags=None, use_cache=True, autotuning=True):
+        """
+        Create a new CUDA context
+        Set device to an id or pci_bus_id to select a specific GPU
+        Set context_flags to cuda.ctx_flags.SCHED_BLOCKING_SYNC for a blocking context
+        """
         self.use_cache = use_cache
         self.logger =  logging.getLogger(__name__)
         self.modules = {}
@@ -60,17 +64,19 @@ class CudaContext(object):
         #Print some info about CUDA
         self.logger.info("CUDA version %s", str(cuda.get_version()))
         self.logger.info("Driver version %s",  str(cuda.get_driver_version()))
-
-        self.cuda_device = cuda.Device(0)
-        self.logger.info("Using '%s' GPU", self.cuda_device.name())
+        
+        if device is None:
+            device = 0
+        
+        self.cuda_device = cuda.Device(device)
+        self.logger.info("Using device %d/%d '%s' (%s) GPU", device, cuda.Device.count(), self.cuda_device.name(), self.cuda_device.pci_bus_id())
         self.logger.debug(" => compute capability: %s", str(self.cuda_device.compute_capability()))
 
         # Create the CUDA context
-        if (self.blocking):
-            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_BLOCKING_SYNC)
-            self.logger.warning("Using blocking context")
-        else:
-            self.cuda_context = self.cuda_device.make_context(flags=cuda.ctx_flags.SCHED_AUTO)
+        if context_flags is None:
+            context_flags=cuda.ctx_flags.SCHED_AUTO
+            
+        self.cuda_context = self.cuda_device.make_context(flags=context_flags)
             
         free, total = cuda.mem_get_info()
         self.logger.debug(" => memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
diff --git a/GPUSimulators/MPISimulator.py b/GPUSimulators/MPISimulator.py
index fb0093c..e0aa6a7 100644
--- a/GPUSimulators/MPISimulator.py
+++ b/GPUSimulators/MPISimulator.py
@@ -143,6 +143,55 @@ class MPIGrid(object):
             out_data = np.empty([self.comm.size] + list(data.shape), dtype=data.dtype)
         self.comm.Gather(data, out_data, root)
         return out_data
+        
+    def getLocalRank(self):
+        """
+        Returns the local rank on this node for this MPI process
+        """
+        
+        # This function has been adapted from 
+        # https://github.com/SheffieldML/PyDeepGP/blob/master/deepgp/util/parallel.py
+        # by Zhenwen Dai released under BSD 3-Clause "New" or "Revised" License:
+        # 
+        # Copyright (c) 2016, Zhenwen Dai
+        # All rights reserved.
+        # 
+        # Redistribution and use in source and binary forms, with or without
+        # modification, are permitted provided that the following conditions are met:
+        # 
+        # * Redistributions of source code must retain the above copyright notice, this
+        #   list of conditions and the following disclaimer.
+        # 
+        # * Redistributions in binary form must reproduce the above copyright notice,
+        #   this list of conditions and the following disclaimer in the documentation
+        #   and/or other materials provided with the distribution.
+        # 
+        # * Neither the name of DGP nor the names of its
+        #   contributors may be used to endorse or promote products derived from
+        #   this software without specific prior written permission.
+        # 
+        # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+        # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+        # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+        # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+        # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+        # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+        # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+        # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+        # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+        # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+        
+        #Get this ranks unique (physical) node name
+        node_name = MPI.Get_processor_name()
+        
+        #Gather the list of all node names on all nodes
+        node_names = self.comm.allgather(node_name)
+                
+        #Loop over all node names up until our rank
+        #and count how many duplicates of our nodename we find
+        local_rank = len([0 for name in node_names[:self.comm.rank] if name==node_name])
+        
+        return local_rank
 
 
 class MPISimulator(Simulator.BaseSimulator):
@@ -233,9 +282,7 @@ class MPISimulator(Simulator.BaseSimulator):
         self.out_n = np.empty_like(self.in_n)
         self.out_s = np.empty_like(self.in_s)
         
-        self.logger.debug("Simlator rank {:d} has neighbors {:s}".format(self.grid.comm.rank, str([self.north, self.south, self.east, self.west])))
-        
-        self.logger.debug("Simlator rank {:d} initialized ".format(self.grid.comm.rank))
+        self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name()))
     
         
     def substep(self, dt, step_number):
diff --git a/mpiTesting.py b/mpiTesting.py
new file mode 100644
index 0000000..52deeaa
--- /dev/null
+++ b/mpiTesting.py
@@ -0,0 +1,137 @@
+# -*- coding: utf-8 -*-
+
+"""
+This python module implements MPI simulations for benchmarking
+
+Copyright (C) 2018  SINTEF ICT
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+
+import numpy as np
+import gc
+import time
+import json
+import logging
+
+#MPI
+from mpi4py import MPI
+
+#CUDA
+import pycuda.driver as cuda
+
+#Simulator engine etc
+from GPUSimulators import MPISimulator, Common, CudaContext
+from GPUSimulators import EE2D_KP07_dimsplit
+from GPUSimulators.helpers import InitialConditions as IC
+from GPUSimulators.Simulator import BoundaryCondition as BC
+
+
+#Get MPI COMM to use
+comm = MPI.COMM_WORLD
+
+
+####
+#Initialize logging 
+####
+log_level_console = 20
+log_level_file    = 10
+log_filename = 'mpi_' + str(comm.rank) + '.log'
+logger = logging.getLogger('GPUSimulators')
+logger.setLevel(min(log_level_console, log_level_file))
+
+ch = logging.StreamHandler()
+ch.setLevel(log_level_console)
+logger.addHandler(ch)
+logger.info("Console logger using level %s", logging.getLevelName(log_level_console))
+
+fh = logging.FileHandler(log_filename)
+formatter = logging.Formatter('%(asctime)s:%(name)s:%(levelname)s: %(message)s')
+fh.setFormatter(formatter)
+fh.setLevel(log_level_file)
+logger.addHandler(fh)
+logger.info("File logger using level %s to %s", logging.getLevelName(log_level_file), log_filename)
+
+
+
+####
+# Initialize MPI grid etc
+####
+logger.info("Creating MPI grid")
+grid = MPISimulator.MPIGrid(MPI.COMM_WORLD)
+
+
+
+####
+# Initialize CUDA
+####
+cuda.init(flags=0)
+logger.info("Initializing CUDA")
+local_rank = grid.getLocalRank()
+num_cuda_devices = cuda.Device.count()
+cuda_device = local_rank % num_cuda_devices
+cuda_context = CudaContext.CudaContext(device=cuda_device, autotuning=False)
+
+
+
+####
+# Set initial conditions
+####
+logger.info("Generating initial conditions")
+nx = 128
+ny = 128
+gamma = 1.4
+save_times = np.linspace(0, 5.0, 10)
+outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc"
+save_var_names = ['rho', 'rho_u', 'rho_v', 'E']
+
+arguments = IC.genKelvinHelmholtz(nx, ny, gamma, grid=grid)
+arguments['context'] = cuda_context
+arguments['theta'] = 1.2
+arguments['grid'] = grid
+
+
+    
+    
+####
+# Run simulation
+####
+logger.info("Running simulation")
+#Helper function to create MPI simulator
+def genSim(grid, **kwargs):
+    local_sim = EE2D_KP07_dimsplit.EE2D_KP07_dimsplit(**kwargs)
+    sim = MPISimulator.MPISimulator(local_sim, grid)
+    return sim
+outfile = Common.runSimulation(genSim, arguments, outfile, save_times, save_var_names)
+
+
+
+####
+# Clean shutdown
+####
+sim = None
+local_sim = None
+cuda_context = None
+arguments = None
+logging.shutdown()
+gc.collect()
+
+
+
+####
+# Print completion and exit
+####
+print("Completed!")
+exit(0)
\ No newline at end of file