Updated way to get kernel

2025-07-13 02:30:59 +02:00 · 2018-08-09 16:46:00 +02:00 · 2018-08-09 16:46:00 +02:00 · 426b8dba5c
commit 426b8dba5c
parent 4da6fd043d
8 changed files with 53 additions and 38 deletions
--- a/SWESimulators/Common.py
+++ b/SWESimulators/Common.py
@ -95,25 +95,16 @@ class CudaContext(object):
        self.cuda_context.detach()
                    
    
-    
-    """
-    Reads a text file and creates an OpenCL kernel from that
-    """
-    def get_kernel(self, kernel_filename, block_width, block_height, include_dirs=[], verbose=False):
-        if (verbose):
-            print("Compiling " + kernel_filename)
-        
+    def hash_kernel(kernel_filename, include_dirs, verbose=False):        
        # Generate a kernel ID for our cache
-        module_path = os.path.dirname(os.path.realpath(__file__))
        
        num_includes = 0
        max_includes = 100
-        include_dirs = include_dirs + [module_path]
-        kernel_hash = kernel_filename + "_" + str(block_width) + "x" + str(block_height)
+        kernel_hash = ""
        
        with Timer("compiler", verbose=False) as timer:
            # Loop over file and includes, and check if something has changed
-            files = [os.path.join(module_path, kernel_filename)]
+            files = [kernel_filename]
            while len(files):
            
                if (num_includes > max_includes):
@ -153,10 +144,32 @@ class CudaContext(object):
        if (verbose):
            print("`-> Hashed in " + str(timer.secs) + " seconds")
            
+        return kernel_hash
+    
+    """
+    Reads a text file and creates an OpenCL kernel from that
+    """
+    def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
+                    prepared_call_args, \
+                    block_width, block_height, \
+                    include_dirs=[], verbose=False, no_extern_c=False):
+        
+        if (verbose):
+            print("Getting " + kernel_filename)
+            
+        # Create a hash of the kernel (and its includes)
+        module_path = os.path.dirname(os.path.realpath(__file__))
+        kernel_hash = kernel_filename \
+                + "_" + str(block_width) + "x" + str(block_height) \
+                + CudaContext.hash_kernel( \
+                    os.path.join(module_path, kernel_filename), \
+                    include_dirs=[module_path] + include_dirs, \
+                    verbose=verbose)
+        
        # Recompile kernel if file or includes have changed
        if (kernel_hash not in self.kernels.keys()):
            if (verbose):
-                print("`-> Kernel not in hash => compiling " + kernel_filename)
+                print("`-> Kernel changed or not in hash => compiling " + kernel_filename)
                
            #Create define string
            define_string = "#define block_width " + str(block_width) + "\n"
@ -165,12 +178,14 @@ class CudaContext(object):
            kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"'
            
            with Timer("compiler", verbose=False) as timer:
-                self.kernels[kernel_hash] = cuda_compiler.SourceModule(kernel_string, include_dirs=include_dirs)
+                self.kernels[kernel_hash] = cuda_compiler.SourceModule(kernel_string, include_dirs=include_dirs, no_extern_c=no_extern_c)
            if (verbose):
                print("`-> Compiled in " + str(timer.secs) + " seconds")
            
        
-        return self.kernels[kernel_hash]
+        kernel = self.kernels[kernel_hash].get_function(kernel_function_name)
+        kernel.prepare(prepared_call_args)
+        return kernel
    
    """
    Clears the kernel cache (useful for debugging & development)
--- a/SWESimulators/FORCE.py
+++ b/SWESimulators/FORCE.py
@ -68,9 +68,9 @@ class FORCE (Simulator.BaseSimulator):
            block_width, block_height);

        #Get kernels
-        self.module = context.get_kernel("FORCE_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("FORCEKernel")
-        self.kernel.prepare("iiffffPiPiPiPiPiPi")
+        self.kernel = context.get_prepared_kernel("FORCE_kernel.cu", "FORCEKernel", \
+                                        "iiffffPiPiPiPiPiPi", \
+                                        block_width, block_height)
    
    def __str__(self):
        return "First order centered"
--- a/SWESimulators/HLL.py
+++ b/SWESimulators/HLL.py
@ -63,9 +63,9 @@ class HLL (Simulator.BaseSimulator):
            block_width, block_height);

        #Get kernels
-        self.module = context.get_kernel("HLL_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("HLLKernel")
-        self.kernel.prepare("iiffffPiPiPiPiPiPi")    
+        self.kernel = context.get_prepared_kernel("HLL_kernel.cu", "HLLKernel", \
+                                    "iiffffPiPiPiPiPiPi", \
+                                    block_width, block_height)
    
    def __str__(self):
        return "Harten-Lax-van Leer"
--- a/SWESimulators/HLL2.py
+++ b/SWESimulators/HLL2.py
@ -69,9 +69,9 @@ class HLL2 (Simulator.BaseSimulator):
        self.theta = np.float32(theta)

        #Get kernels
-        self.module = context.get_kernel("HLL2_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("HLL2Kernel")
-        self.kernel.prepare("iifffffiPiPiPiPiPiPi")
+        self.kernel = context.get_prepared_kernel("HLL2_kernel.cu", "HLL2Kernel", \
+                                        "iifffffiPiPiPiPiPiPi", \
+                                        block_width, block_height)
        
    def __str__(self):
        return "Harten-Lax-van Leer (2nd order)"
--- a/SWESimulators/KP07.py
+++ b/SWESimulators/KP07.py
@ -72,9 +72,9 @@ class KP07 (Simulator.BaseSimulator):
        self.r = np.float32(r)

        #Get kernels
-        self.module = context.get_kernel("KP07_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("KP07Kernel")
-        self.kernel.prepare("iiffffffiPiPiPiPiPiPi")
+        self.kernel = context.get_prepared_kernel("KP07_kernel.cu", "KP07Kernel", \
+                                        "iiffffffiPiPiPiPiPiPi", \
+                                        block_width, block_height)
        
    def __str__(self):
        return "Kurganov-Petrova 2007"
--- a/SWESimulators/KP07_dimsplit.py
+++ b/SWESimulators/KP07_dimsplit.py
@ -70,9 +70,9 @@ class KP07_dimsplit (Simulator.BaseSimulator):
        self.theta = np.float32(theta)

        #Get kernels
-        self.module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("KP07DimsplitKernel")
-        self.kernel.prepare("iifffffiPiPiPiPiPiPi")    
+        self.kernel = context.get_prepared_kernel("KP07_dimsplit_kernel.cu", "KP07DimsplitKernel", \
+                                            "iifffffiPiPiPiPiPiPi", \
+                                            block_width, block_height)
    
    def __str__(self):
        return "Kurganov-Petrova 2007 dimensionally split"
--- a/SWESimulators/LxF.py
+++ b/SWESimulators/LxF.py
@ -64,9 +64,9 @@ class LxF (Simulator.BaseSimulator):
            block_width, block_height);

        # Get kernels
-        self.module = context.get_kernel("LxF_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("LxFKernel")
-        self.kernel.prepare("iiffffPiPiPiPiPiPi")
+        self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
+                                        "iiffffPiPiPiPiPiPi", \
+                                        block_width, block_height)
        
    def __str__(self):
        return "Lax Friedrichs"
--- a/SWESimulators/WAF.py
+++ b/SWESimulators/WAF.py
@ -63,9 +63,9 @@ class WAF (Simulator.BaseSimulator):
            block_width, block_height);

        #Get kernels
-        self.module = context.get_kernel("WAF_kernel.cu", block_width, block_height)
-        self.kernel = self.module.get_function("WAFKernel")
-        self.kernel.prepare("iiffffiPiPiPiPiPiPi")
+        self.kernel = context.get_prepared_kernel("WAF_kernel.cu", "WAFKernel", \
+                                        "iiffffiPiPiPiPiPiPi", \
+                                        block_width, block_height)
    
    def __str__(self):
        return "Weighted average flux"