Updated way to get kernel

This commit is contained in:
André R. Brodtkorb 2018-08-09 16:46:00 +02:00
parent 4da6fd043d
commit 426b8dba5c
8 changed files with 53 additions and 38 deletions

View File

@ -95,25 +95,16 @@ class CudaContext(object):
self.cuda_context.detach() self.cuda_context.detach()
def hash_kernel(kernel_filename, include_dirs, verbose=False):
"""
Reads a text file and creates an OpenCL kernel from that
"""
def get_kernel(self, kernel_filename, block_width, block_height, include_dirs=[], verbose=False):
if (verbose):
print("Compiling " + kernel_filename)
# Generate a kernel ID for our cache # Generate a kernel ID for our cache
module_path = os.path.dirname(os.path.realpath(__file__))
num_includes = 0 num_includes = 0
max_includes = 100 max_includes = 100
include_dirs = include_dirs + [module_path] kernel_hash = ""
kernel_hash = kernel_filename + "_" + str(block_width) + "x" + str(block_height)
with Timer("compiler", verbose=False) as timer: with Timer("compiler", verbose=False) as timer:
# Loop over file and includes, and check if something has changed # Loop over file and includes, and check if something has changed
files = [os.path.join(module_path, kernel_filename)] files = [kernel_filename]
while len(files): while len(files):
if (num_includes > max_includes): if (num_includes > max_includes):
@ -153,10 +144,32 @@ class CudaContext(object):
if (verbose): if (verbose):
print("`-> Hashed in " + str(timer.secs) + " seconds") print("`-> Hashed in " + str(timer.secs) + " seconds")
return kernel_hash
"""
Reads a text file and creates an OpenCL kernel from that
"""
def get_prepared_kernel(self, kernel_filename, kernel_function_name, \
prepared_call_args, \
block_width, block_height, \
include_dirs=[], verbose=False, no_extern_c=False):
if (verbose):
print("Getting " + kernel_filename)
# Create a hash of the kernel (and its includes)
module_path = os.path.dirname(os.path.realpath(__file__))
kernel_hash = kernel_filename \
+ "_" + str(block_width) + "x" + str(block_height) \
+ CudaContext.hash_kernel( \
os.path.join(module_path, kernel_filename), \
include_dirs=[module_path] + include_dirs, \
verbose=verbose)
# Recompile kernel if file or includes have changed # Recompile kernel if file or includes have changed
if (kernel_hash not in self.kernels.keys()): if (kernel_hash not in self.kernels.keys()):
if (verbose): if (verbose):
print("`-> Kernel not in hash => compiling " + kernel_filename) print("`-> Kernel changed or not in hash => compiling " + kernel_filename)
#Create define string #Create define string
define_string = "#define block_width " + str(block_width) + "\n" define_string = "#define block_width " + str(block_width) + "\n"
@ -165,12 +178,14 @@ class CudaContext(object):
kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"' kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"'
with Timer("compiler", verbose=False) as timer: with Timer("compiler", verbose=False) as timer:
self.kernels[kernel_hash] = cuda_compiler.SourceModule(kernel_string, include_dirs=include_dirs) self.kernels[kernel_hash] = cuda_compiler.SourceModule(kernel_string, include_dirs=include_dirs, no_extern_c=no_extern_c)
if (verbose): if (verbose):
print("`-> Compiled in " + str(timer.secs) + " seconds") print("`-> Compiled in " + str(timer.secs) + " seconds")
return self.kernels[kernel_hash] kernel = self.kernels[kernel_hash].get_function(kernel_function_name)
kernel.prepare(prepared_call_args)
return kernel
""" """
Clears the kernel cache (useful for debugging & development) Clears the kernel cache (useful for debugging & development)

View File

@ -68,9 +68,9 @@ class FORCE (Simulator.BaseSimulator):
block_width, block_height); block_width, block_height);
#Get kernels #Get kernels
self.module = context.get_kernel("FORCE_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("FORCE_kernel.cu", "FORCEKernel", \
self.kernel = self.module.get_function("FORCEKernel") "iiffffPiPiPiPiPiPi", \
self.kernel.prepare("iiffffPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "First order centered" return "First order centered"

View File

@ -63,9 +63,9 @@ class HLL (Simulator.BaseSimulator):
block_width, block_height); block_width, block_height);
#Get kernels #Get kernels
self.module = context.get_kernel("HLL_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("HLL_kernel.cu", "HLLKernel", \
self.kernel = self.module.get_function("HLLKernel") "iiffffPiPiPiPiPiPi", \
self.kernel.prepare("iiffffPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "Harten-Lax-van Leer" return "Harten-Lax-van Leer"

View File

@ -69,9 +69,9 @@ class HLL2 (Simulator.BaseSimulator):
self.theta = np.float32(theta) self.theta = np.float32(theta)
#Get kernels #Get kernels
self.module = context.get_kernel("HLL2_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("HLL2_kernel.cu", "HLL2Kernel", \
self.kernel = self.module.get_function("HLL2Kernel") "iifffffiPiPiPiPiPiPi", \
self.kernel.prepare("iifffffiPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "Harten-Lax-van Leer (2nd order)" return "Harten-Lax-van Leer (2nd order)"

View File

@ -72,9 +72,9 @@ class KP07 (Simulator.BaseSimulator):
self.r = np.float32(r) self.r = np.float32(r)
#Get kernels #Get kernels
self.module = context.get_kernel("KP07_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("KP07_kernel.cu", "KP07Kernel", \
self.kernel = self.module.get_function("KP07Kernel") "iiffffffiPiPiPiPiPiPi", \
self.kernel.prepare("iiffffffiPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "Kurganov-Petrova 2007" return "Kurganov-Petrova 2007"

View File

@ -70,9 +70,9 @@ class KP07_dimsplit (Simulator.BaseSimulator):
self.theta = np.float32(theta) self.theta = np.float32(theta)
#Get kernels #Get kernels
self.module = context.get_kernel("KP07_dimsplit_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("KP07_dimsplit_kernel.cu", "KP07DimsplitKernel", \
self.kernel = self.module.get_function("KP07DimsplitKernel") "iifffffiPiPiPiPiPiPi", \
self.kernel.prepare("iifffffiPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "Kurganov-Petrova 2007 dimensionally split" return "Kurganov-Petrova 2007 dimensionally split"

View File

@ -64,9 +64,9 @@ class LxF (Simulator.BaseSimulator):
block_width, block_height); block_width, block_height);
# Get kernels # Get kernels
self.module = context.get_kernel("LxF_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("LxF_kernel.cu", "LxFKernel", \
self.kernel = self.module.get_function("LxFKernel") "iiffffPiPiPiPiPiPi", \
self.kernel.prepare("iiffffPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "Lax Friedrichs" return "Lax Friedrichs"

View File

@ -63,9 +63,9 @@ class WAF (Simulator.BaseSimulator):
block_width, block_height); block_width, block_height);
#Get kernels #Get kernels
self.module = context.get_kernel("WAF_kernel.cu", block_width, block_height) self.kernel = context.get_prepared_kernel("WAF_kernel.cu", "WAFKernel", \
self.kernel = self.module.get_function("WAFKernel") "iiffffiPiPiPiPiPiPi", \
self.kernel.prepare("iiffffiPiPiPiPiPiPi") block_width, block_height)
def __str__(self): def __str__(self):
return "Weighted average flux" return "Weighted average flux"