From 3e401e3fe138ee2688be1300c4ca97e7fd62e853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20R=2E=20Brodtkorb?= Date: Fri, 10 Aug 2018 13:59:52 +0200 Subject: [PATCH] Renamed macro and added iPython magic --- SWESimulators/Common.py | 10 ++-- SWESimulators/FORCE_kernel.cu | 16 +++--- SWESimulators/HLL2_kernel.cu | 22 ++++---- SWESimulators/HLL_kernel.cu | 16 +++--- SWESimulators/IPythonMagic.py | 75 +++++++++++++++++++++++++++ SWESimulators/KP07_dimsplit_kernel.cu | 22 ++++---- SWESimulators/KP07_kernel.cu | 26 +++++----- SWESimulators/LxF_kernel.cu | 18 +++---- SWESimulators/WAF_kernel.cu | 16 +++--- SWESimulators/common.cu | 44 ++++++++-------- SWESimulators/limiters.cu | 12 ++--- 11 files changed, 178 insertions(+), 99 deletions(-) create mode 100644 SWESimulators/IPythonMagic.py diff --git a/SWESimulators/Common.py b/SWESimulators/Common.py index ff249f8..68f92be 100644 --- a/SWESimulators/Common.py +++ b/SWESimulators/Common.py @@ -37,6 +37,7 @@ class Timer(object): Class which keeps track of the CUDA context and some helper functions """ class CudaContext(object): + def __init__(self, verbose=True, blocking=False): self.verbose = verbose self.blocking = blocking @@ -93,7 +94,10 @@ class CudaContext(object): if (self.verbose): print(" `-> <" + str(self.cuda_context.handle) + "> Detaching context") self.cuda_context.detach() - + + + def __str__(self): + return "CudaContext id " + str(self.cuda_context.handle) def hash_kernel(kernel_filename, include_dirs, verbose=False): # Generate a kernel ID for our cache @@ -172,8 +176,8 @@ class CudaContext(object): print("`-> Kernel changed or not in hash => compiling " + kernel_filename) #Create define string - define_string = "#define block_width " + str(block_width) + "\n" - define_string += "#define block_height " + str(block_height) + "\n\n" + define_string = "#define BLOCK_WIDTH " + str(block_width) + "\n" + define_string += "#define BLOCK_HEIGHT " + str(block_height) + "\n\n" kernel_string = define_string + '#include "' + os.path.join(module_path, kernel_filename) + '"' diff --git a/SWESimulators/FORCE_kernel.cu b/SWESimulators/FORCE_kernel.cu index 5cd022a..7fac9ba 100644 --- a/SWESimulators/FORCE_kernel.cu +++ b/SWESimulators/FORCE_kernel.cu @@ -27,8 +27,8 @@ along with this program. If not, see . * Computes the flux along the x axis for all faces */ __device__ -void computeFluxF(float Q[3][block_height+2][block_width+2], - float F[3][block_height+1][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_, const float dx_, const float dt_) { //Index of thread within block @@ -39,7 +39,7 @@ void computeFluxF(float Q[3][block_height+2][block_width+2], { int j=ty; const int l = j + 1; //Skip ghost cells - for (int i=tx; i. * Computes the flux along the x axis for all faces */ __device__ -void computeFluxF(float Q[3][block_height+4][block_width+4], - float Qx[3][block_height+2][block_width+2], - float F[3][block_height+1][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_, const float dx_, const float dt_) { //Index of thread within block const int tx = get_local_id(0); @@ -43,7 +43,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4], { const int j=ty; const int l = j + 2; //Skip ghost cells - for (int i=tx; i. * Computes the flux along the x axis for all faces */ __device__ -void computeFluxF(float Q[3][block_height+2][block_width+2], - float F[3][block_height+1][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_) { //Index of thread within block const int tx = get_local_id(0); @@ -40,7 +40,7 @@ void computeFluxF(float Q[3][block_height+2][block_width+2], { const int j=ty; const int l = j + 1; //Skip ghost cells - for (int i=tx; i. +""" + +from IPython.core.magic import line_magic, Magics, magics_class +import pycuda.driver as cuda + + + +@magics_class +class CudaContextHandler(Magics): + @line_magic + def cuda_context_handler(self, context_name): + print("Registering " + context_name + " as a global context") + + if context_name in self.shell.user_ns.keys(): + print("`-> Context already registered! Ignoring") + return + else: + print("`-> Creating context") + self.shell.ex(context_name + " = Common.CudaContext(verbose=True, blocking=False)") + + # this function will be called on exceptions in any cell + def custom_exc(shell, etype, evalue, tb, tb_offset=None): + print("Exception caught: Resetting to CUDA context " + context_name) + while (cuda.Context.get_current() != None): + context = cuda.Context.get_current() + print("`-> popping " + str(context.handle)) + cuda.Context.pop() + + if context_name in self.shell.user_ns.keys(): + print("`-> pushing " + str(self.shell.user_ns[context_name].cuda_context.handle)) + self.shell.ex(context_name + ".cuda_context.push()") + else: + print("No CUDA context called " + context_name + " found (something is wrong)!") + print("CUDA will not work now") + + # still show the error within the notebook, don't just swallow it + shell.showtraceback((etype, evalue, tb), tb_offset=tb_offset) + + # this registers a custom exception handler for the whole current notebook + get_ipython().set_custom_exc((Exception,), custom_exc) + + + # Handle CUDA context when exiting python + import atexit + def exitfunc(): + print("Exitfunc: Resetting CUDA context stack") + while (cuda.Context.get_current() != None): + context = cuda.Context.get_current() + print("`-> popping " + str(context.handle)) + cuda.Context.pop() + atexit.register(exitfunc) + +print("Registering automatic CUDA context handling") +print("(use %cuda_context_handler my_context to create a context called my_context") +ip = get_ipython() +ip.register_magics(CudaContextHandler) diff --git a/SWESimulators/KP07_dimsplit_kernel.cu b/SWESimulators/KP07_dimsplit_kernel.cu index 799e9b8..b37fa65 100644 --- a/SWESimulators/KP07_dimsplit_kernel.cu +++ b/SWESimulators/KP07_dimsplit_kernel.cu @@ -30,9 +30,9 @@ along with this program. If not, see . __device__ -void computeFluxF(float Q[3][block_height+4][block_width+4], - float Qx[3][block_height+2][block_width+2], - float F[3][block_height+1][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_, const float dx_, const float dt_) { //Index of thread within block const int tx = get_local_id(0); @@ -41,7 +41,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4], { int j=ty; const int l = j + 2; //Skip ghost cells - for (int i=tx; i. __device__ -void computeFluxF(float Q[3][block_height+4][block_width+4], - float Qx[3][block_height+2][block_width+2], - float F[3][block_height+1][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float Qx[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_) { //Index of thread within block const int tx = get_local_id(0); @@ -41,7 +41,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4], { int j=ty; const int l = j + 2; //Skip ghost cells - for (int i=tx; i. * Computes the flux along the x axis for all faces */ __device__ -void computeFluxF(float Q[3][block_height+2][block_width+2], - float F[3][block_height][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+2][BLOCK_WIDTH+2], + float F[3][BLOCK_HEIGHT][BLOCK_WIDTH+1], const float g_, const float dx_, const float dt_) { //Index of thread within block const int tx = get_local_id(0); @@ -37,7 +37,7 @@ void computeFluxF(float Q[3][block_height+2][block_width+2], { const int j=ty; const int l = j + 1; //Skip ghost cells - for (int i=tx; i. * Computes the flux along the x axis for all faces */ __device__ -void computeFluxF(float Q[3][block_height+4][block_width+4], - float F[3][block_height+1][block_width+1], +void computeFluxF(float Q[3][BLOCK_HEIGHT+4][BLOCK_WIDTH+4], + float F[3][BLOCK_HEIGHT+1][BLOCK_WIDTH+1], const float g_, const float dx_, const float dt_) { //Index of thread within block const int tx = get_local_id(0); @@ -43,7 +43,7 @@ void computeFluxF(float Q[3][block_height+4][block_width+4], { int j=ty; const int l = j + 2; //Skip ghost cells - for (int i=tx; i