refactor(GPUSimulator): follow PEP 8 style guide

2025-10-31 01:07:40 +01:00 · 2025-02-14 12:40:31 +01:00 · 2025-02-14 12:40:31 +01:00 · ef207432db
commit ef207432db
parent ce8e834771
17 changed files with 286 additions and 354 deletions
--- a/GPUSimulators/Autotuner.py
+++ b/GPUSimulators/Autotuner.py
@ -28,9 +28,9 @@ from socket import gethostname

 import pycuda.driver as cuda

-
 from GPUSimulators import Common, Simulator, CudaContext

+
 class Autotuner:
    def __init__(self, 
                nx=2048, ny=2048, 
@ -44,7 +44,6 @@ class Autotuner:
        self.block_heights = block_heights
        self.performance = {}

-
    def benchmark(self, simulator, force=False):
        logger = logging.getLogger(__name__)
        
@ -95,13 +94,12 @@ class Autotuner:
        # Save to file
        np.savez_compressed(self.filename, **benchmark_data)

-
-            
-    """
-    Function which reads a numpy file with autotuning data
-    and reports the maximum performance and block size
-    """
    def get_peak_performance(self, simulator):
+        """
+        Function which reads a numpy file with autotuning data
+        and reports the maximum performance and block size
+        """
+
        logger = logging.getLogger(__name__)
        
        assert issubclass(simulator, Simulator.BaseSimulator)
@ -140,13 +138,12 @@ class Autotuner:
            #This should never happen
            raise "Something wrong: Could not get autotuning data!"
            return None
-        
-        
-                
-    """
-    Runs a set of benchmarks for a single simulator
-    """
+    
    def benchmark_single_simulator(simulator, arguments, block_widths, block_heights):
+        """
+        Runs a set of benchmarks for a single simulator
+        """
+
        logger = logging.getLogger(__name__)
        
        megacells = np.empty((len(block_heights), len(block_widths)))
@ -168,11 +165,11 @@ class Autotuner:

        return megacells
            
-            
-    """
-    Runs a benchmark, and returns the number of megacells achieved
-    """
    def run_benchmark(simulator, arguments, timesteps=10, warmup_timesteps=2):
+        """
+        Runs a benchmark, and returns the number of megacells achieved
+        """
+
        logger = logging.getLogger(__name__)
        
        #Initialize simulator
@ -218,12 +215,11 @@ class Autotuner:
            logger.debug("%s [%d x %d] failed: gpu elapsed %f", simulator.__name__, arguments["block_width"], arguments["block_height"], gpu_elapsed)
            return np.nan
        
-        
-        
-    """
-    Generates test dataset
-    """
    def gen_test_data(nx, ny, g):
+        """
+        Generates test dataset
+        """
+
        width = 100.0
        height = 100.0
        dx = width / float(nx)
@ -264,10 +260,11 @@ class Autotuner:
        
        return h, hu, hv, dx, dy, dt
        
-    """
-    Checks that a variable is "sane"
-    """
    def sanity_check(variable, bound_min, bound_max):
+        """
+        Checks that a variable is "sane"
+        """
+
        maxval = np.amax(variable)
        minval = np.amin(variable)
        if (np.isnan(maxval) 
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -41,10 +41,6 @@ import pycuda.driver as cuda
 from pycuda.tools import PageLockedMemoryPool


-
-
-
-
 def safeCall(cmd):
    logger = logging.getLogger(__name__)
    try:
@ -65,16 +61,20 @@ def safeCall(cmd):

    return stdout

+
 def getGitHash():
    return safeCall(["git", "rev-parse", "HEAD"])

+
 def getGitStatus():
    return safeCall(["git", "status", "--porcelain", "-uno"])

+
 def toJson(in_dict, compressed=True):
    """
    Creates JSON string from a dictionary
    """
+
    logger = logging.getLogger(__name__)
    out_dict = in_dict.copy()
    for key in out_dict:
@ -89,12 +89,14 @@ def toJson(in_dict, compressed=True):
                out_dict[key] = value
    return json.dumps(out_dict)

+
 def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names=[], dt=None):
    """
    Runs a simulation, and stores output in netcdf file. Stores the times given in 
    save_times, and saves all of the variables in list save_var_names. Elements in  
    save_var_names can be set to None if you do not want to save them
    """
+
    profiling_data_sim_runner = { 'start': {}, 'end': {} }
    profiling_data_sim_runner["start"]["t_sim_init"] = 0
    profiling_data_sim_runner["end"]["t_sim_init"] = 0
@ -208,14 +210,11 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
    return outdata.filename, profiling_data_sim_runner, sim.profiling_data_mpi


-
-
-
-
 class Timer(object):
    """
    Class which keeps track of time spent for a section of code
    """
+
    def __init__(self, tag, log_level=logging.DEBUG):
        self.tag = tag
        self.log_level = log_level
@ -233,16 +232,14 @@ class Timer(object):

    def elapsed(self):
        return time.time() - self.start
-            
-            
-            
-            
+

 class PopenFileBuffer(object):
    """
    Simple class for holding a set of tempfiles
    for communicating with a subprocess
    """
+
    def __init__(self):
        self.stdout = tempfile.TemporaryFile(mode='w+t')
        self.stderr = tempfile.TemporaryFile(mode='w+t')
@ -262,10 +259,12 @@ class PopenFileBuffer(object):

        return cout, cerr

+
 class IPEngine(object):
    """
    Class for starting IPEngines for MPI processing in IPython
    """
+
    def __init__(self, n_engines):
        self.logger = logging.getLogger(__name__)
        
@ -352,10 +351,6 @@ class IPEngine(object):
            self.c_buff = None
        
            gc.collect()
-        
-
-            
-        


 class DataDumper(object):
@ -366,6 +361,7 @@ class DataDumper(object):
    with DataDumper("filename") as data:
        ...
    """
+
    def __init__(self, filename, *args, **kwargs):
        self.logger = logging.getLogger(__name__)
        
@ -400,7 +396,6 @@ class DataDumper(object):
        #Log output
        self.logger.info("Initialized " + self.filename)
        
-        
    def __enter__(self):
        self.logger.info("Opening " + self.filename)
        if (self.args):
@ -414,7 +409,6 @@ class DataDumper(object):
        self.logger.info("Closing " + self.filename)
        self.ncfile.close()
        
-        
    def toJson(in_dict):
        out_dict = in_dict.copy()

@ -428,15 +422,13 @@ class DataDumper(object):
                    out_dict[key] = str(out_dict[key])

        return json.dumps(out_dict)
-        


-        
-        
 class ProgressPrinter(object):
    """
    Small helper class for 
    """
+
    def __init__(self, total_steps, print_every=5):
        self.logger = logging.getLogger(__name__)
        self.start = time.time()
@ -487,19 +479,16 @@ class ProgressPrinter(object):
        return progressbar


-
-
-
-
-
-"""
-Class that holds 2D data 
-"""
 class CudaArray2D:
    """
-    Uploads initial data to the CUDA device
+    Class that holds 2D data 
    """
+
    def __init__(self, stream, nx, ny, x_halo, y_halo, cpu_data=None, dtype=np.float32):
+        """
+        Uploads initial data to the CUDA device
+        """
+
        self.logger =  logging.getLogger(__name__)
        self.nx = nx
        self.ny = ny
@ -531,16 +520,16 @@ class CudaArray2D:
        self.upload(stream, cpu_data, extent=[x, y, cpu_data.shape[1], cpu_data.shape[0]])
        #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
        
-        
    def __del__(self, *args):
        #self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
        self.data.gpudata.free()
        self.data = None
-        
-    """
-    Enables downloading data from GPU to Python
-    """
+
    def download(self, stream, cpu_data=None, asynch=False, extent=None):
+        """
+        Enables downloading data from GPU to Python
+        """
+
        if (extent is None):
            x = self.x_halo
            y = self.y_halo
@ -583,7 +572,6 @@ class CudaArray2D:
        
        return cpu_data
        
-        
    def upload(self, stream, cpu_data, extent=None):
        if (extent is None):
            x = self.x_halo
@ -615,21 +603,17 @@ class CudaArray2D:
        
        copy(stream)

-        
-        
-        
-        
-        
-        
-        
-"""
-Class that holds 2D data 
-"""
+
 class CudaArray3D:
    """
-    Uploads initial data to the CL device
+    Class that holds 3D data 
    """
+
    def __init__(self, stream, nx, ny, nz, x_halo, y_halo, z_halo, cpu_data=None, dtype=np.float32):
+        """
+        Uploads initial data to the CL device
+        """
+
        self.logger =  logging.getLogger(__name__)
        self.nx = nx
        self.ny = ny
@ -688,16 +672,16 @@ class CudaArray3D:
        
        #self.logger.debug("Buffer <%s> [%dx%d]: Allocated ", int(self.data.gpudata), self.nx, self.ny)
        
-        
    def __del__(self, *args):
        #self.logger.debug("Buffer <%s> [%dx%d]: Releasing ", int(self.data.gpudata), self.nx, self.ny)
        self.data.gpudata.free()
        self.data = None
        
-    """
-    Enables downloading data from GPU to Python
-    """
    def download(self, stream, asynch=False):
+        """
+        Enables downloading data from GPU to Python
+        """
+
        #self.logger.debug("Downloading [%dx%d] buffer", self.nx, self.ny)
        #Allocate host memory
        #cpu_data = cuda.pagelocked_empty((self.ny, self.nx), np.float32)
@ -727,18 +711,12 @@ class CudaArray3D:
        
        return cpu_data

-        
-        
-        
-        
-        
-        
-        
-        
-"""
-A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
-"""
+
 class ArakawaA2D:
+    """
+    A class representing an Arakawa A type (unstaggered, logically Cartesian) grid
+    """
+
    def __init__(self, stream, nx, ny, halo_x, halo_y, cpu_variables):
        """
        Uploads initial data to the GPU device
--- a/GPUSimulators/CudaContext.py
+++ b/GPUSimulators/CudaContext.py
@ -19,8 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

-
-
 import os

 import numpy as np
@ -38,11 +36,10 @@ import pycuda.driver as cuda
 from GPUSimulators import Autotuner, Common


-
-"""
-Class which keeps track of the CUDA context and some helper functions
-"""
 class CudaContext(object):
+    """
+    Class which keeps track of the CUDA context and some helper functions
+    """
    
    def __init__(self, device=None, context_flags=None, use_cache=True, autotuning=True):
        """
@ -50,6 +47,7 @@ class CudaContext(object):
        Set device to an id or pci_bus_id to select a specific GPU
        Set context_flags to cuda.ctx_flags.SCHED_BLOCKING_SYNC for a blocking context
        """
+
        self.use_cache = use_cache
        self.logger =  logging.getLogger(__name__)
        self.modules = {}
@ -94,7 +92,6 @@ class CudaContext(object):
        if (autotuning):
            self.logger.info("Autotuning enabled. It may take several minutes to run the code the first time: have patience")
            self.autotuner = Autotuner.Autotuner()
-            
    
    def __del__(self, *args):
        self.logger.info("Cleaning up CUDA context handle <%s>", str(self.cuda_context.handle))
@ -119,10 +116,8 @@ class CudaContext(object):
        self.logger.debug("<%s> Detaching", str(self.cuda_context.handle))
        self.cuda_context.detach()
        
-        
    def __str__(self):
        return "CudaContext id " + str(self.cuda_context.handle)
-        
    
    def hash_kernel(kernel_filename, include_dirs):        
        # Generate a kernel ID for our caches
@ -171,18 +166,19 @@ class CudaContext(object):
            
        return kernel_hasher.hexdigest()

-
-    """
-    Reads a text file and creates an OpenCL kernel from that
-    """
    def get_module(self, kernel_filename, 
                    include_dirs=[], \
                    defines={}, \
                    compile_args={'no_extern_c', True}, jit_compile_args={}):
        """
-        Helper function to print compilation output
+        Reads a text file and creates an OpenCL kernel from that
        """
+
        def cuda_compile_message_handler(compile_success_bool, info_str, error_str):
+            """
+            Helper function to print compilation output
+            """
+
            self.logger.debug("Compilation returned %s", str(compile_success_bool))
            if info_str:
                self.logger.debug("Info: %s", info_str)
@ -257,16 +253,18 @@ class CudaContext(object):
            self.modules[kernel_hash] = module
            return module
    
-    """
-    Clears the kernel cache (useful for debugging & development)
-    """
    def clear_kernel_cache(self):
+        """
+        Clears the kernel cache (useful for debugging & development)
+        """
+
        self.logger.debug("Clearing cache")
        self.modules = {}
        gc.collect()
        
-    """
-    Synchronizes all streams etc
-    """
    def synchronize(self):
+        """
+        Synchronizes all streams etc
+        """
+
        self.cuda_context.synchronize()
--- a/GPUSimulators/EE2D_KP07_dimsplit.py
+++ b/GPUSimulators/EE2D_KP07_dimsplit.py
@ -27,33 +27,11 @@ import numpy as np
 from pycuda import gpuarray


-        
-        
-        
-        
-        
-
-
-"""
-Class that solves the SW equations using the Forward-Backward linear scheme
-"""
 class EE2D_KP07_dimsplit (BaseSimulator):
+    """
+    Class that solves the SW equations using the Forward-Backward linear scheme
+    """

-    """
-    Initialization routine
-    rho: Density
-    rho_u: Momentum along x-axis
-    rho_v: Momentum along y-axis
-    E: energy
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis
-    dy: Grid cell spacing along y-axis
-    dt: Size of each timestep 
-    g: Gravitational constant
-    gamma: Gas constant
-    p: pressure
-    """
    def __init__(self, 
                 context, 
                 rho, rho_u, rho_v, E, 
@ -65,7 +43,24 @@ class EE2D_KP07_dimsplit (BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=8):
-                 
+        """
+        Initialization routine
+
+        Args:
+            rho: Density
+            rho_u: Momentum along x-axis
+            rho_v: Momentum along y-axis
+            E: energy
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis
+            dy: Grid cell spacing along y-axis
+            dt: Size of each timestep 
+            g: Gravitational constant
+            gamma: Gas constant
+            p: pressure
+        """
+                    
        # Call super constructor
        super().__init__(context, 
            nx, ny, 
@ -107,7 +102,6 @@ class EE2D_KP07_dimsplit (BaseSimulator):
        dt_y = np.min(self.dy / (np.abs(rho_v/rho) + np.sqrt(gamma*rho)))
        self.dt = min(dt_x, dt_y)
        self.cfl_data.fill(self.dt, stream=self.stream)
-                        
    
    def substep(self, dt, step_number, external=True, internal=True):
            self.substepDimsplit(0.5*dt, step_number, external, internal)
--- a/GPUSimulators/FORCE.py
+++ b/GPUSimulators/FORCE.py
@ -28,31 +28,11 @@ import numpy as np
 from pycuda import gpuarray


-        
-        
-        
-        
-        
-        
-
-
-"""
-Class that solves the SW equations 
-"""
 class FORCE (Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations 
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@ -62,6 +42,20 @@ class FORCE (Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
+        """
+        Initialization routine
+        
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/HLL.py
+++ b/GPUSimulators/HLL.py
@ -27,27 +27,11 @@ import numpy as np
 from pycuda import gpuarray


-
-
-
-
-"""
-Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
-"""
 class HLL (Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations using the Harten-Lax -van Leer approximate Riemann solver
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
                 context,
                 h0, hu0, hv0, 
@ -57,6 +41,20 @@ class HLL (Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
+        """
+        Initialization routine
+
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/HLL2.py
+++ b/GPUSimulators/HLL2.py
@ -26,30 +26,12 @@ import numpy as np

 from pycuda import gpuarray

-        
-        
-        
-        
-        

-
-"""
-Class that solves the SW equations using the Forward-Backward linear scheme
-"""
 class HLL2 (Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations using the Forward-Backward linear scheme
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@ -60,6 +42,20 @@ class HLL2 (Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
+        """
+        Initialization routine
+
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/KP07.py
+++ b/GPUSimulators/KP07.py
@ -32,25 +32,11 @@ import numpy as np
 from pycuda import gpuarray


-
-
-"""
-Class that solves the SW equations using the Forward-Backward linear scheme
-"""
 class KP07 (Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations using the Forward-Backward linear scheme
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@ -62,6 +48,20 @@ class KP07 (Simulator.BaseSimulator):
                 order=2,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
+        """
+        Initialization routine
+        
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/KP07_dimsplit.py
+++ b/GPUSimulators/KP07_dimsplit.py
@ -32,26 +32,11 @@ import numpy as np
 from pycuda import gpuarray


-
-
-
-"""
-Class that solves the SW equations using the dimentionally split KP07 scheme
-"""
 class KP07_dimsplit(Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations using the dimentionally split KP07 scheme
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
                 context, 
                 h0, hu0, hv0, 
@ -62,6 +47,20 @@ class KP07_dimsplit(Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
+        """
+        Initialization routine
+        
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/LxF.py
+++ b/GPUSimulators/LxF.py
@ -21,43 +21,41 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

 #Import packages we need
-from GPUSimulators import Simulator, Common
+from GPUSimulators import CudaContext, Simulator, Common
 from GPUSimulators.Simulator import BaseSimulator, BoundaryCondition
 import numpy as np

 from pycuda import gpuarray


-
-
-
-
-"""
-Class that solves the SW equations using the Lax Friedrichs scheme
-"""
 class LxF (Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations using the Lax Friedrichs scheme
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
-                 context, 
-                 h0, hu0, hv0, 
-                 nx, ny, 
-                 dx, dy, 
-                 g, 
-                 cfl_scale=0.9,
+                 context: CudaContext, 
+                 h0: float, hu0: float, hv0: float, 
+                 nx: int, ny: int, 
+                 dx: int, dy: int, 
+                 g: float, 
+                 cfl_scale: float=0.9,
                 boundary_conditions=BoundaryCondition(),
-                 block_width=16, block_height=16):
+                 block_width: int=16, block_height: int=16):
+        """
+        Initialization routine
+
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
@ -66,7 +64,7 @@ class LxF (Simulator.BaseSimulator):
            boundary_conditions,
            cfl_scale,
            1,
-            block_width, block_height);
+            block_width, block_height)
        self.g = np.float32(g) 

        # Get kernels
@ -99,6 +97,11 @@ class LxF (Simulator.BaseSimulator):
        self.cfl_data.fill(dt, stream=self.stream)
        
    def substep(self, dt, step_number):
+        """
+        Args:
+            dt: Size of each timestep (seconds)
+        """
+        
        self.kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, 
                self.nx, self.ny, 
                self.dx, self.dy, dt, 
--- a/GPUSimulators/MPISimulator.py
+++ b/GPUSimulators/MPISimulator.py
@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

-
 import logging
 from GPUSimulators import Simulator
 import numpy as np
@ -30,12 +29,12 @@ import pycuda.driver as cuda
 #import nvtx


-
 class MPIGrid(object):
    """
    Class which represents an MPI grid of nodes. Facilitates easy communication between
    neighboring nodes
    """
+
    def __init__(self, comm, ndims=2):
        self.logger =  logging.getLogger(__name__)
        
@ -144,7 +143,6 @@ class MPIGrid(object):
        
        return grid

-
    def gather(self, data, root=0):
        out_data = None
        if (self.comm.rank == root):
@ -206,6 +204,7 @@ class MPISimulator(Simulator.BaseSimulator):
    """
    Class which handles communication between simulators on different MPI nodes
    """
+
    def __init__(self, sim, grid):        
        self.profiling_data_mpi = { 'start': {}, 'end': {} }
        self.profiling_data_mpi["start"]["t_mpi_halo_exchange"] = 0
@ -353,12 +352,12 @@ class MPISimulator(Simulator.BaseSimulator):
        self.logger.debug("Local dt: {:f}, global dt: {:f}".format(local_dt[0], global_dt[0]))
        return global_dt[0]
        
-        
    def getExtent(self):
        """
        Function which returns the extent of node with rank 
        rank in the grid
        """
+
        width = self.sim.nx*self.sim.dx
        height = self.sim.ny*self.sim.dy
        i, j = self.grid.getCoordinate()
--- a/GPUSimulators/SHMEMSimulator.py
+++ b/GPUSimulators/SHMEMSimulator.py
@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

-
 import logging
 from GPUSimulators import Simulator, CudaContext
 import numpy as np
@ -28,6 +27,7 @@ import pycuda.driver as cuda

 import time

+
 class SHMEMSimulator(Simulator.BaseSimulator):
    """
    Class which handles communication and synchronization between simulators in different 
--- a/GPUSimulators/SHMEMSimulatorGroup.py
+++ b/GPUSimulators/SHMEMSimulatorGroup.py
@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

-
 import logging
 from GPUSimulators import Simulator, CudaContext
 import numpy as np
@ -28,6 +27,7 @@ import pycuda.driver as cuda

 import time

+
 class SHMEMGrid(object):
    """
    Class which represents an SHMEM grid of GPUs. Facilitates easy communication between
@ -156,6 +156,7 @@ class SHMEMGrid(object):
        
        return grid

+
 class SHMEMSimulatorGroup(object):
    """
    Class which handles communication and synchronization between simulators in different 
@ -277,7 +278,6 @@ class SHMEMSimulatorGroup(object):
            self.s[i] = np.empty((self.nvars[i], self.read_s[i][3], self.read_s[i][2]), dtype=np.float32)

        self.logger.debug("Initialized {:d} subdomains".format(len(self.sims)))
-    

    def substep(self, dt, step_number):
        self.exchange()
--- a/GPUSimulators/Simulator.py
+++ b/GPUSimulators/Simulator.py
@ -29,23 +29,20 @@ import pycuda.compiler as cuda_compiler
 import pycuda.gpuarray
 import pycuda.driver as cuda

-from GPUSimulators import Common
-
-
-        
+from GPUSimulators import Common, CudaContext


 class BoundaryCondition(object):    
    """
    Class for holding boundary conditions for global boundaries
    """
-    
-    
+
    class Type(IntEnum):
        """
        Enum that describes the different types of boundary conditions
        WARNING: MUST MATCH THAT OF common.h IN CUDA
        """
+
        Dirichlet = 0,
        Neumann = 1,
        Periodic = 2,
@ -60,6 +57,7 @@ class BoundaryCondition(object):
        """
        Constructor
        """
+
        self.north = types['north']
        self.south = types['south']
        self.east = types['east']
@ -74,11 +72,11 @@ class BoundaryCondition(object):
    def __str__(self):
        return  '[north={:s}, south={:s}, east={:s}, west={:s}]'.format(str(self.north), str(self.south), str(self.east), str(self.west))

-        
    def asCodedInt(self):
        """
        Helper function which packs four boundary conditions into one integer
        """
+
        bc = 0
        bc = bc | (self.north & 0x0000000F) << 24
        bc = bc | (self.south & 0x0000000F) << 16
@ -98,39 +96,36 @@ class BoundaryCondition(object):
        types['east']  = BoundaryCondition.Type((bc >>  8) & 0x0000000F)
        types['west']  = BoundaryCondition.Type((bc >>  0) & 0x0000000F)
        return types
-        
-    
-    
-    
-    
-    
-    
-    
+
+
 class BaseSimulator(object):
   
    def __init__(self, 
-                 context, 
-                 nx, ny, 
-                 dx, dy, 
-                 boundary_conditions,
-                 cfl_scale,
-                 num_substeps,
-                 block_width, block_height):
+                 context: CudaContext, 
+                 nx: int, ny: int, 
+                 dx: int, dy: int, 
+                 boundary_conditions: BoundaryCondition,
+                 cfl_scale: float,
+                 num_substeps: int,
+                 block_width: int, block_height: int):
        """
        Initialization routine
-        context: GPU context to use
-        kernel_wrapper: wrapper function of GPU kernel
-        h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-        hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-        hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-        nx: Number of cells along x-axis
-        ny: Number of cells along y-axis
-        dx: Grid cell spacing along x-axis (20 000 m)
-        dy: Grid cell spacing along y-axis (20 000 m)
-        dt: Size of each timestep (90 s)
-        cfl_scale: Courant number
-        num_substeps: Number of substeps to perform for a full step
+        
+        Args:
+            context: GPU context to use
+            kernel_wrapper: wrapper function of GPU kernel
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            cfl_scale: Courant number
+            num_substeps: Number of substeps to perform for a full step
        """
+
        #Get logger
        self.logger = logging.getLogger(__name__ + "." + self.__class__.__name__)
        
@ -147,7 +142,7 @@ class BaseSimulator(object):
        self.num_substeps = num_substeps
        
        #Handle autotuning block size
-        if (self.context.autotuner):
+        if self.context.autotuner:
            peak_configuration = self.context.autotuner.get_peak_performance(self.__class__)
            block_width = int(peak_configuration["block_width"])
            block_height = int(peak_configuration["block_height"])
@ -167,12 +162,10 @@ class BaseSimulator(object):
        #Keep track of simulation time and number of timesteps
        self.t = 0.0
        self.nt = 0
-        

    def __str__(self):
        return "{:s} [{:d}x{:d}]".format(self.__class__.__name__, self.nx, self.ny)

-
    def simulate(self, t, dt=None):
        """ 
        Function which simulates t_end seconds using the step function
@ -216,11 +209,14 @@ class BaseSimulator(object):
                    e.args += ("Step={:d}, time={:f}".format(self.simSteps(), self.simTime()),)
                    raise

-
-    def step(self, dt):
+    def step(self, dt: int):
        """
        Function which performs one single timestep of size dt
+        
+        Args:
+            dt: Size of each timestep (seconds)
        """
+
        for i in range(self.num_substeps):
            self.substep(dt, i)
            
@ -253,6 +249,7 @@ class BaseSimulator(object):
        """
        Function which performs one single substep with stepsize dt
        """
+
        raise(NotImplementedError("Needs to be implemented in subclass"))
        
    def getOutput(self):
@ -264,23 +261,13 @@ class BaseSimulator(object):
        
    def computeDt(self):
        raise(NotImplementedError("Needs to be implemented in subclass"))
-       
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
-        
+
+
 def stepOrderToCodedInt(step, order):
    """
    Helper function which packs the step and order into a single integer
    """
+
    step_order = (step << 16) | (order & 0x0000ffff)
    #print("Step:  {0:032b}".format(step))
    #print("Order: {0:032b}".format(order))
--- a/GPUSimulators/WAF.py
+++ b/GPUSimulators/WAF.py
@ -28,25 +28,11 @@ import numpy as np
 from pycuda import gpuarray


-
-
-"""
-Class that solves the SW equations using the Forward-Backward linear scheme
-"""
 class WAF (Simulator.BaseSimulator):
+    """
+    Class that solves the SW equations using the Forward-Backward linear scheme
+    """

-    """
-    Initialization routine
-    h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
-    hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
-    hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
-    nx: Number of cells along x-axis
-    ny: Number of cells along y-axis
-    dx: Grid cell spacing along x-axis (20 000 m)
-    dy: Grid cell spacing along y-axis (20 000 m)
-    dt: Size of each timestep (90 s)
-    g: Gravitational accelleration (9.81 m/s^2)
-    """
    def __init__(self, 
                 context,
                 h0, hu0, hv0, 
@ -56,6 +42,20 @@ class WAF (Simulator.BaseSimulator):
                 cfl_scale=0.9,
                 boundary_conditions=BoundaryCondition(), 
                 block_width=16, block_height=16):
+        """
+        Initialization routine
+
+        Args:
+            h0: Water depth incl ghost cells, (nx+1)*(ny+1) cells
+            hu0: Initial momentum along x-axis incl ghost cells, (nx+1)*(ny+1) cells
+            hv0: Initial momentum along y-axis incl ghost cells, (nx+1)*(ny+1) cells
+            nx: Number of cells along x-axis
+            ny: Number of cells along y-axis
+            dx: Grid cell spacing along x-axis (20 000 m)
+            dy: Grid cell spacing along y-axis (20 000 m)
+            dt: Size of each timestep (90 s)
+            g: Gravitational accelleration (9.81 m/s^2)
+        """
                 
        # Call super constructor
        super().__init__(context, 
--- a/GPUSimulators/helpers/InitialConditions.py
+++ b/GPUSimulators/helpers/InitialConditions.py
@ -19,7 +19,6 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

-
 from GPUSimulators.Simulator import BoundaryCondition
 import numpy as np
 import gc
@ -73,10 +72,7 @@ def downsample(highres_solution, x_factor, y_factor=None):
    return highres_solution.reshape([int(ny), int(y_factor), int(nx), int(x_factor)]).mean(3).mean(1)


-
-
-    
-def bump(nx, ny, width, height, 
+def bump(nx: int, ny: int, width: int, height: int, 
        bump_size=None, 
        ref_nx=None, ref_ny=None,
        x_center=0.5, y_center=0.5,
@ -189,12 +185,7 @@ def genShockBubble(nx, ny, gamma, grid=None):
    } 
    return arguments

-    
-    
-    
-    
-    
-    
+
 def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None, index=None):
    """
    Roughness parameter in (0, 1.0] determines how "squiggly" 
@ -205,6 +196,7 @@ def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None, index=None):
        """
        Generates the zones of the two fluids of K-H
        """
+
        zone = np.zeros((ny, nx), dtype=np.int32)


@ -298,13 +290,13 @@ def genKelvinHelmholtz(nx, ny, gamma, roughness=0.125, grid=None, index=None):
    } 
    
    return arguments
-    
-    
-    
+ 
+
 def genRayleighTaylor(nx, ny, gamma, version=0, grid=None):
    """
    Generates Rayleigh-Taylor instability case
    """
+
    width = 0.5
    height = 1.5
    g = 0.1
--- a/GPUSimulators/helpers/Visualization.py
+++ b/GPUSimulators/helpers/Visualization.py
@ -19,14 +19,11 @@ You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 """

-
-
 import numpy as np

 from matplotlib.colors import Normalize


-
 def genSchlieren(rho):
    #Compute length of z-component of normalized gradient vector 
    normal = np.gradient(rho) #[x, y, 1]