refactor(GPUSimulator): follow PEP 8 style guide

2026-01-06 19:58:44 +01:00 · 2025-02-14 12:40:31 +01:00
parent ce8e834771
commit ef207432db
17 changed files with 286 additions and 354 deletions
--- a/GPUSimulators/Autotuner.py
+++ b/GPUSimulators/Autotuner.py
@@ -28,9 +28,9 @@ from socket import gethostname
 import pycuda.driver as cuda
 from GPUSimulators import Common, Simulator, CudaContext
 class Autotuner:
    def __init__(self, 
                nx=2048, ny=2048, 
@@ -44,7 +44,6 @@ class Autotuner:
        self.block_heights = block_heights
        self.performance = {}
    def benchmark(self, simulator, force=False):
        logger = logging.getLogger(__name__)
@@ -95,13 +94,12 @@ class Autotuner:
        # Save to file
        np.savez_compressed(self.filename, **benchmark_data)
-
+    def get_peak_performance(self, simulator):
        """
        Function which reads a numpy file with autotuning data
        and reports the maximum performance and block size
        """
-    def get_peak_performance(self, simulator):
+
        logger = logging.getLogger(__name__)
        assert issubclass(simulator, Simulator.BaseSimulator)
@@ -141,12 +139,11 @@ class Autotuner:
            raise "Something wrong: Could not get autotuning data!"
            return None
-        
+    def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
        """
        Runs a set of benchmarks for a single simulator
        """
-    def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
+
        logger = logging.getLogger(__name__)
        megacells = np.empty((len(block_heights), len(block_widths)))
@@ -168,11 +165,11 @@ class Autotuner:
        return megacells
-            
+    def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
        """
        Runs a benchmark, and returns the number of megacells achieved
        """
-    def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
+
        logger = logging.getLogger(__name__)
        #Initialize simulator
@@ -218,12 +215,11 @@ class Autotuner:
            logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
            return np.nan
-        
+    def gen_test_data(nx, ny, g):
        """
        Generates test dataset
        """
-    def gen_test_data(nx, ny, g):
+
        width = 100.0
        height = 100.0
        dx = width / float(nx)
@@ -264,10 +260,11 @@ class Autotuner:
        return h, hu, hv, dx, dy, dt
    def sanity_check(variable, bound_min, bound_max):
        """
        Checks that a variable is "sane"
        """
-    def sanity_check(variable, bound_min, bound_max):
+
        maxval = np.amax(variable)
        minval = np.amin(variable)
        if (np.isnan(maxval) 
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@@ -41,10 +41,6 @@ import pycuda.driver as cuda
 from pycuda.tools import PageLockedMemoryPool
 def safeCall(cmd):
    logger = logging.getLogger(__name__)
    try:
@@ -65,16 +61,20 @@ def safeCall(cmd):
    return stdout
 def getGitHash():
    return safeCall(["git", "rev-parse", "HEAD"])
 def getGitStatus():
    return safeCall(["git", "status", "--porcelain", "-uno"])
 def toJson(in_dict, compressed=True):
    """
    Creates JSON string from a dictionary
    """
    logger = logging.getLogger(__name__)
    out_dict = in_dict.copy()
    for key in out_dict:
@@ -89,12 +89,14 @@ def toJson(in_dict, compressed=True):
                out_dict[key] = value
    return json.dumps(out_dict)
 def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None):
    """
    Runs a simulation, and stores output in netcdf file. Stores the times given in 
    save_times, and saves all of the variables in list save_var_names. Elements in  
    save_var_names can be set to None if you do not want to save them
    """
    profiling_data_sim_runner = { 'start': {}, 'end': {} }
    profiling_data_sim_runner["start"]["t_sim_init"] = 0
    profiling_data_sim_runner["end"]["t_sim_init"] = 0
@@ -208,14 +210,11 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
    return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi
 class Timer(object):
    """
    Class which keeps track of time spent for a section of code
    """
    def __init__(self, tag, log_level=logging.DEBUG):
        self.tag = tag
        self.log_level = log_level
@@ -235,14 +234,12 @@ class Timer(object):
        return time.time() - self.start
 class PopenFileBuffer(object):
    """
    Simple class for holding a set of tempfiles
    for communicating with a subprocess
    """
    def __init__(self):
        self.stdout = tempfile.TemporaryFile(mode='w+t')
        self.stderr = tempfile.TemporaryFile(mode='w+t')
@@ -262,10 +259,12 @@ class PopenFileBuffer(object):
        return cout, cerr
 class IPEngine(object):
    """
    Class for starting IPEngines for MPI processing in IPython
    """
    def __init__(self, n_engines):
        self.logger = logging.getLogger(__name__)
@@ -354,10 +353,6 @@ class IPEngine(object):
            gc.collect()
 class DataDumper(object):
    """
    Simple class for holding a netCDF4 object
@@ -366,6 +361,7 @@ class DataDumper(object):
    with DataDumper("filename") as data:
        ...
    """
    def __init__(self, filename, *args, **kwargs):
        self.logger = logging.getLogger(__name__)
@@ -400,7 +396,6 @@ class DataDumper(object):
        #Log output
        self.logger.info("Initialized " + self.filename)
    def __enter__(self):
        self.logger.info("Opening " + self.filename)
        if (self.args):
@@ -414,7 +409,6 @@ class DataDumper(object):
        self.logger.info("Closing " + self.filename)
        self.ncfile.close()
    def toJson(in_dict):
        out_dict = in_dict.copy()
@@ -430,13 +424,11 @@ class DataDumper(object):
        return json.dumps(out_dict)
 class ProgressPrinter(object):
    """
    Small helper class for 
    """
    def __init__(self, total_steps, print_every=5):
        self.logger = logging.getLogger(__name__)
        self.start = time.time()
@@ -487,19 +479,16 @@ class ProgressPrinter(object):
        return progressbar
-
+class CudaArray2D:
    """
    Class that holds 2D data 
    """
-class CudaArray2D:
+
    def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
        """
        Uploads initial data to the CUDA device
        """
-    def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
+
        self.logger =  logging.getLogger(__name__)
        self.nx = nx
        self.ny = ny
@@ -531,16 +520,16 @@ class CudaArray2D:
        self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
        #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
    def __del__(self, *args):
        #self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
        self.data.gpudata.free()
        self.data = None
    def download(self, stream, cpu_data=None, asynch=False, extent=None):
        """
        Enables downloading data from GPU to Python
        """
-    def download(self, stream, cpu_data=None, asynch=False, extent=None):
+
        if (extent is None):
            x = self.x_halo
            y = self.y_halo
@@ -583,7 +572,6 @@ class CudaArray2D:
        return cpu_data
    def upload(self, stream, cpu_data, extent=None):
        if (extent is None):
            x = self.x_halo
@@ -616,20 +604,16 @@ class CudaArray2D:
        copy(stream)
 """
 Class that holds 2D data 
 """
 class CudaArray3D:
    """
    Class that holds 3D data 
    """
    def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32):
        """
        Uploads initial data to the CL device
        """
-    def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32):
+
        self.logger =  logging.getLogger(__name__)
        self.nx = nx
        self.ny = ny
@@ -688,16 +672,16 @@ class CudaArray3D:
        #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
    def __del__(self, *args):
        #self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
        self.data.gpudata.free()
        self.data = None
    def download(self, stream, asynch=False):
        """
        Enables downloading data from GPU to Python
        """
-    def download(self, stream, asynch=False):
+
        #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
        #Allocate host memory
        #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
@@ -728,17 +712,11 @@ class CudaArray3D:
        return cpu_data
-        
+class ArakawaA2D:
    """
    A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
    """
-class ArakawaA2D:
+
    def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
        """
        Uploads initial data to the GPU device
--- a/GPUSimulators/CudaContext.py
+++ b/GPUSimulators/CudaContext.py
@@ -19,8 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 import os
 import numpy as np
@@ -38,11 +36,10 @@ import pycuda.driver as cuda
 from GPUSimulators import Autotuner, Common
-
+class CudaContext(object):
    """
    Class which keeps track of the CUDA context and some helper functions
    """
 class CudaContext(object):
    def __init__(self, device=None, context_flags=None, use_cache=True, autotuning=True):
        """
@@ -50,6 +47,7 @@ class CudaContext(object):
        Set device to an id or pci_bus_id to select a specific GPU
        Set context_flags to cuda.ctx_flags.SCHED_BLOCKING_SYNC for a blocking context
        """
        self.use_cache = use_cache
        self.logger =  logging.getLogger(__name__)
        self.modules = {}
@@ -95,7 +93,6 @@ class CudaContext(object):
            self.logger.info("Autotuning enabled. It may take several minutes to run the code the first time: have patience")
            self.autotuner = Autotuner.Autotuner()
    def __del__(self, *args):
        self.logger.info("Cleaning up CUDA context handle <%s>", str(self.cuda_context.handle))
@@ -119,11 +116,9 @@ class CudaContext(object):
        self.logger.debug("<%s> Detaching", str(self.cuda_context.handle))
        self.cuda_context.detach()
    def __str__(self):
        return "CudaContext id " + str(self.cuda_context.handle)
    def hash_kernel(kernel_filename, include_dirs):        
        # Generate a kernel ID for our caches
        num_includes = 0
@@ -171,18 +166,19 @@ class CudaContext(object):
        return kernel_hasher.hexdigest()
    """
    Reads a text file and creates an OpenCL kernel from that
    """
    def get_module(self, kernel_filename, 
                    include_dirs=[], \
                    defines={}, \
                    compile_args={'no_extern_c', True}, jit_compile_args={}):
        """
        Reads a text file and creates an OpenCL kernel from that
        """
        def cuda_compile_message_handler(compile_success_bool, info_str, error_str):
            """
            Helper function to print compilation output
            """
-        def cuda_compile_message_handler(compile_success_bool, info_str, error_str):
+
            self.logger.debug("Compilation returned %s", str(compile_success_bool))
            if info_str:
                self.logger.debug("Info: %s", info_str)
@@ -257,16 +253,18 @@ class CudaContext(object):
            self.modules[kernel_hash] = module
            return module
    def clear_kernel_cache(self):
        """
        Clears the kernel cache (useful for debugging & development)
        """
-    def clear_kernel_cache(self):
+
        self.logger.debug("Clearing cache")
        self.modules = {}
        gc.collect()
    def synchronize(self):
        """
        Synchronizes all streams etc
        """
-    def synchronize(self):
+
        self.cuda_context.synchronize()
--- a/GPUSimulators/EE2D_KP07_dimsplit.py
+++ b/GPUSimulators/EE2D_KP07_dimsplit.py
@@ -27,20 +27,26 @@ import numpy as np
 from pycuda import gpuarray
-        
+class EE2D_KP07_dimsplit (BaseSimulator):
    """
    Class that solves the SW equations using the Forward-Backward linear scheme
    """
 class EE2D_KP07_dimsplit (BaseSimulator):
    def __init__(self, 
                 context, 
                 rho, rho_u, rho_v, E, 
                 nx, ny, 
                 dx, dy,  
                 g, 
                 gamma, 
                 theta=1.3, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=8):
        """
        Initialization routine
        Args:
            rho: Density
            rho_u: Momentum along x-axis
            rho_v: Momentum along y-axis
@@ -54,17 +60,6 @@ class EE2D_KP07_dimsplit (BaseSimulator):
            gamma: Gas constant
            p: pressure
        """
    def __init__(self, 
                 context, 
                 rho, rho_u, rho_v, E, 
                 nx, ny, 
                 dx, dy,  
                 g, 
                 gamma, 
                 theta=1.3, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=8):
        # Call super constructor
        super().__init__(context, 
@@ -108,7 +103,6 @@ class EE2D_KP07_dimsplit (BaseSimulator):
        self.dt = min(dt_x, dt_y)
        self.cfl_data.fill(self.dt, stream=self.stream)
    def substep(self, dt, step_number, external=True, internal=True):
            self.substepDimsplit(0.5*dt, step_number, external, internal)
--- a/GPUSimulators/FORCE.py
+++ b/GPUSimulators/FORCE.py
@@ -28,21 +28,24 @@ import numpy as np
 from pycuda import gpuarray
-        
+class FORCE (Simulator.BaseSimulator):
    """
    Class that solves the SW equations 
    """
 class FORCE (Simulator.BaseSimulator):
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
@@ -53,15 +56,6 @@ class FORCE (Simulator.BaseSimulator):
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/HLL.py
+++ b/GPUSimulators/HLL.py
@@ -27,17 +27,24 @@ import numpy as np
 from pycuda import gpuarray
-
+class HLL (Simulator.BaseSimulator):
    """
    Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
    """
 class HLL (Simulator.BaseSimulator):
    def __init__(self, 
                 context,
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
@@ -48,15 +55,6 @@ class HLL (Simulator.BaseSimulator):
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
    def __init__(self, 
                 context,
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/HLL2.py
+++ b/GPUSimulators/HLL2.py
@@ -27,29 +27,11 @@ import numpy as np
 from pycuda import gpuarray
-        
+class HLL2 (Simulator.BaseSimulator):
    """
    Class that solves the SW equations using the Forward-Backward linear scheme
    """
 class HLL2 (Simulator.BaseSimulator):
    """
    Initialization routine
    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
    nx: Number of cells along x-axis
    ny: Number of cells along y-axis
    dx: Grid cell spacing along x-axis (20 000 m)
    dy: Grid cell spacing along y-axis (20 000 m)
    dt: Size of each timestep (90 s)
    g: Gravitational accelleration (9.81 m/s^2)
    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@@ -60,6 +42,20 @@ class HLL2 (Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
            nx: Number of cells along x-axis
            ny: Number of cells along y-axis
            dx: Grid cell spacing along x-axis (20 000 m)
            dy: Grid cell spacing along y-axis (20 000 m)
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/KP07.py
+++ b/GPUSimulators/KP07.py
@@ -32,25 +32,11 @@ import numpy as np
 from pycuda import gpuarray
-
+class KP07 (Simulator.BaseSimulator):
    """
    Class that solves the SW equations using the Forward-Backward linear scheme
    """
 class KP07 (Simulator.BaseSimulator):
    """
    Initialization routine
    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
    nx: Number of cells along x-axis
    ny: Number of cells along y-axis
    dx: Grid cell spacing along x-axis (20 000 m)
    dy: Grid cell spacing along y-axis (20 000 m)
    dt: Size of each timestep (90 s)
    g: Gravitational accelleration (9.81 m/s^2)
    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@@ -62,6 +48,20 @@ class KP07 (Simulator.BaseSimulator):
                 order=2,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
            nx: Number of cells along x-axis
            ny: Number of cells along y-axis
            dx: Grid cell spacing along x-axis (20 000 m)
            dy: Grid cell spacing along y-axis (20 000 m)
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/KP07_dimsplit.py
+++ b/GPUSimulators/KP07_dimsplit.py
@@ -32,26 +32,11 @@ import numpy as np
 from pycuda import gpuarray
-
+class KP07_dimsplit(Simulator.BaseSimulator):
    """
    Class that solves the SW equations using the dimentionally split KP07 scheme
    """
 class KP07_dimsplit(Simulator.BaseSimulator):
    """
    Initialization routine
    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
    nx: Number of cells along x-axis
    ny: Number of cells along y-axis
    dx: Grid cell spacing along x-axis (20 000 m)
    dy: Grid cell spacing along y-axis (20 000 m)
    dt: Size of each timestep (90 s)
    g: Gravitational accelleration (9.81 m/s^2)
    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@@ -62,6 +47,20 @@ class KP07_dimsplit(Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
            nx: Number of cells along x-axis
            ny: Number of cells along y-axis
            dx: Grid cell spacing along x-axis (20 000 m)
            dy: Grid cell spacing along y-axis (20 000 m)
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/LxF.py
+++ b/GPUSimulators/LxF.py
@@ -21,24 +21,31 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 #Import packages we need
-from GPUSimulators import Simulator, Common
+from GPUSimulators import CudaContext, Simulator, Common
 from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
 import numpy as np
 from pycuda import gpuarray
-
+class LxF (Simulator.BaseSimulator):
    """
    Class that solves the SW equations using the Lax Friedrichs scheme
    """
 class LxF (Simulator.BaseSimulator):
    def __init__(self, 
                 context: CudaContext, 
                 h0: float, hu0: float, hv0: float, 
                 nx: int, ny: int, 
                 dx: int, dy: int, 
                 g: float, 
                 cfl_scale: float=0.9,
                 boundary_conditions=BoundaryCondition(),
                 block_width: int=16, block_height: int=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
@@ -49,15 +56,6 @@ class LxF (Simulator.BaseSimulator):
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(),
                 block_width=16, block_height=16):
        # Call super constructor
        super().__init__(context, 
@@ -66,7 +64,7 @@ class LxF (Simulator.BaseSimulator):
            boundary_conditions,
            cfl_scale,
            1,
-            block_width, block_height);
+            block_width, block_height)
        self.g = np.float32(g) 
        # Get kernels
@@ -99,6 +97,11 @@ class LxF (Simulator.BaseSimulator):
        self.cfl_data.fill(dt, stream=self.stream)
    def substep(self, dt, step_number):
        """
        Args:
            dt: Size of each timestep (seconds)
        """
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, 
                self.nx, self.ny, 
                self.dx, self.dy, dt, 
--- a/GPUSimulators/MPISimulator.py
+++ b/GPUSimulators/MPISimulator.py
@@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 import logging
 from GPUSimulators import Simulator
 import numpy as np
@@ -30,12 +29,12 @@ import pycuda.driver as cuda
 #import nvtx
 class MPIGrid(object):
    """
    Class which represents an MPI grid of nodes. Facilitates easy communication between
    neighboring nodes
    """
    def __init__(self, comm, ndims=2):
        self.logger =  logging.getLogger(__name__)
@@ -144,7 +143,6 @@ class MPIGrid(object):
        return grid
    def gather(self, data, root=0):
        out_data = None
        if (self.comm.rank == root):
@@ -206,6 +204,7 @@ class MPISimulator(Simulator.BaseSimulator):
    """
    Class which handles communication between simulators on different MPI nodes
    """
    def __init__(self, sim, grid):        
        self.profiling_data_mpi = { 'start': {}, 'end': {} }
        self.profiling_data_mpi["start"]["t_mpi_halo_exchange"] = 0
@@ -353,12 +352,12 @@ class MPISimulator(Simulator.BaseSimulator):
        self.logger.debug("Local dt: {:f}, global dt: {:f}".format(local_dt[0], global_dt[0]))
        return global_dt[0]
    def getExtent(self):
        """
        Function which returns the extent of node with rank 
        rank in the grid
        """
        width = self.sim.nx*self.sim.dx
        height = self.sim.ny*self.sim.dy
        i, j = self.grid.getCoordinate()
--- a/GPUSimulators/SHMEMSimulator.py
+++ b/GPUSimulators/SHMEMSimulator.py
@@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 import logging
 from GPUSimulators import Simulator, CudaContext
 import numpy as np
@@ -28,6 +27,7 @@ import pycuda.driver as cuda
 import time
 class SHMEMSimulator(Simulator.BaseSimulator):
    """
    Class which handles communication and synchronization between simulators in different 
--- a/GPUSimulators/SHMEMSimulatorGroup.py
+++ b/GPUSimulators/SHMEMSimulatorGroup.py
@@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 import logging
 from GPUSimulators import Simulator, CudaContext
 import numpy as np
@@ -28,6 +27,7 @@ import pycuda.driver as cuda
 import time
 class SHMEMGrid(object):
    """
    Class which represents an SHMEM grid of GPUs. Facilitates easy communication between
@@ -156,6 +156,7 @@ class SHMEMGrid(object):
        return grid
 class SHMEMSimulatorGroup(object):
    """
    Class which handles communication and synchronization between simulators in different 
@@ -278,7 +279,6 @@ class SHMEMSimulatorGroup(object):
        self.logger.debug("Initialized {:d} subdomains".format(len(self.sims)))
    def substep(self, dt, step_number):
        self.exchange()
--- a/GPUSimulators/Simulator.py
+++ b/GPUSimulators/Simulator.py
@@ -29,10 +29,7 @@ import pycuda.compiler as cuda_compiler
 import pycuda.gpuarray
 import pycuda.driver as cuda
-from GPUSimulators import Common
+from GPUSimulators import Common, CudaContext
 class BoundaryCondition(object):    
@@ -40,12 +37,12 @@ class BoundaryCondition(object):
    Class for holding boundary conditions for global boundaries
    """
    class Type(IntEnum):
        """
        Enum that describes the different types of boundary conditions
        WARNING: MUST MATCH THAT OF common.h IN CUDA
        """
        Dirichlet = 0,
        Neumann = 1,
        Periodic = 2,
@@ -60,6 +57,7 @@ class BoundaryCondition(object):
        """
        Constructor
        """
        self.north = types['north']
        self.south = types['south']
        self.east = types['east']
@@ -74,11 +72,11 @@ class BoundaryCondition(object):
    def __str__(self):
        return  '[north={:s}, south={:s}, east={:s}, west={:s}]'.format(str(self.north), str(self.south), str(self.east), str(self.west))
    def asCodedInt(self):
        """
        Helper function which packs four boundary conditions into one integer
        """
        bc = 0
        bc = bc | (self.north & 0x0000000F) << 24
        bc = bc | (self.south & 0x0000000F) << 16
@@ -100,24 +98,20 @@ class BoundaryCondition(object):
        return types
 class BaseSimulator(object):
    def __init__(self, 
-                 context, 
+                 context: CudaContext, 
-                 nx, ny, 
+                 nx: int, ny: int, 
-                 dx, dy, 
+                 dx: int, dy: int, 
-                 boundary_conditions,
+                 boundary_conditions: BoundaryCondition,
-                 cfl_scale,
+                 cfl_scale: float,
-                 num_substeps,
+                 num_substeps: int,
-                 block_width, block_height):
+                 block_width: int, block_height: int):
        """
        Initialization routine
        Args:
            context: GPU context to use
            kernel_wrapper: wrapper function of GPU kernel
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
@@ -131,6 +125,7 @@ class BaseSimulator(object):
            cfl_scale: Courant number
            num_substeps: Number of substeps to perform for a full step
        """
        #Get logger
        self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
@@ -147,7 +142,7 @@ class BaseSimulator(object):
        self.num_substeps = num_substeps
        #Handle autotuning block size
-        if (self.context.autotuner):
+        if self.context.autotuner:
            peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
            block_width = int(peak_configuration["block_width"])
            block_height = int(peak_configuration["block_height"])
@@ -168,11 +163,9 @@ class BaseSimulator(object):
        self.t = 0.0
        self.nt = 0
    def __str__(self):
        return "{:s} [{:d}x{:d}]".format(self.__class__.__name__, self.nx, self.ny)
    def simulate(self, t, dt=None):
        """ 
        Function which simulates t_end seconds using the step function
@@ -216,11 +209,14 @@ class BaseSimulator(object):
                    e.args += ("Step={:d}, time={:f}".format(self.simSteps(), self.simTime()),)
                    raise
-
+    def step(self, dt: int):
    def step(self, dt):
        """
        Function which performs one single timestep of size dt
        Args:
            dt: Size of each timestep (seconds)
        """
        for i in range(self.num_substeps):
            self.substep(dt, i)
@@ -253,6 +249,7 @@ class BaseSimulator(object):
        """
        Function which performs one single substep with stepsize dt
        """
        raise(NotImplementedError("Needs to be implemented in subclass"))
    def getOutput(self):
@@ -266,21 +263,11 @@ class BaseSimulator(object):
        raise(NotImplementedError("Needs to be implemented in subclass"))
 def stepOrderToCodedInt(step, order):
    """
    Helper function which packs the step and order into a single integer
    """
    step_order = (step << 16) | (order & 0x0000ffff)
    #print("Step:  {0:032b}".format(step))
    #print("Order: {0:032b}".format(order))
--- a/GPUSimulators/WAF.py
+++ b/GPUSimulators/WAF.py
@@ -28,15 +28,24 @@ import numpy as np
 from pycuda import gpuarray
-
+class WAF (Simulator.BaseSimulator):
    """
    Class that solves the SW equations using the Forward-Backward linear scheme
    """
 class WAF (Simulator.BaseSimulator):
    def __init__(self, 
                 context,
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        """
        Initialization routine
        Args:
            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
@@ -47,15 +56,6 @@ class WAF (Simulator.BaseSimulator):
            dt: Size of each timestep (90 s)
            g: Gravitational accelleration (9.81 m/s^2)
        """
    def __init__(self, 
                 context,
                 h0, hu0, hv0, 
                 nx, ny, 
                 dx, dy, 
                 g, 
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/helpers/InitialConditions.py
+++ b/GPUSimulators/helpers/InitialConditions.py
@@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 from GPUSimulators.Simulator import BoundaryCondition
 import numpy as np
 import gc
@@ -73,10 +72,7 @@ def downsample(highres_solution, x_factor, y_factor=None):
    return highres_solution.reshape([int(ny), int(y_factor), int(nx), int(x_factor)]).mean(3).mean(1)
-
+def bump(nx: int, ny: int, width: int, height: int, 
 def bump(nx, ny, width, height, 
        bump_size=None, 
        ref_nx=None, ref_ny=None,
        x_center=0.5, y_center=0.5,
@@ -190,11 +186,6 @@ def genShockBubble(nx, ny, gamma, grid=None):
    return arguments
 def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None, index=None):
    """
    Roughness parameter in (0, 1.0] determines how "squiggly" 
@@ -205,6 +196,7 @@ def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None, index=None):
        """
        Generates the zones of the two fluids of K-H
        """
        zone = np.zeros((ny, nx), dtype=np.int32)
@@ -300,11 +292,11 @@ def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None, index=None):
    return arguments
 def genRayleighTaylor(nx, ny, gamma, version=0, grid=None):
    """
    Generates Rayleigh-Taylor instability case
    """
    width = 0.5
    height = 1.5
    g = 0.1
--- a/GPUSimulators/helpers/Visualization.py
+++ b/GPUSimulators/helpers/Visualization.py
@@ -19,14 +19,11 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 import numpy as np
 from matplotlib.colors import Normalize
 def genSchlieren(rho):
    #Compute length of z-component of normalized gradient vector 
    normal = np.gradient(rho) #[x, y, 1]