Merge pull request #6 from babrodtk/autotuning

Updated domain size benchmark in autotuning
2025-07-02 02:30:59 +02:00 · 2018-08-23 16:06:35 +02:00 · 2018-08-23 16:06:35 +02:00 · 1ff3485918
commit 1ff3485918
parent e9245aea8c 5668e28f99
5 changed files with 303 additions and 17 deletions
--- a/Autotuning.ipynb
+++ b/Autotuning.ipynb
--- a/GPUSimulators/Autotuner.py
+++ b/GPUSimulators/Autotuner.py
@ -29,7 +29,7 @@ from socket import gethostname
 import pycuda.driver as cuda


-from GPUSimulators import Common, LxF, FORCE, HLL, HLL2, KP07, KP07_dimsplit, WAF
+from GPUSimulators import Common, Simulator

 class Autotuner:
    def __init__(self, 
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -100,7 +100,7 @@ class CudaContext(object):
            self.cache_path = os.path.join(self.module_path, "cuda_cache") 
            if not os.path.isdir(self.cache_path):
                os.mkdir(self.cache_path)
-            self.logger.debug("Using CUDA cache dir %s", self.cache_path)
+            self.logger.info("Using CUDA cache dir %s", self.cache_path)
            
        self.autotuner = None
        if (autotuning):
@ -395,6 +395,7 @@ class SWEDataArakawaA:
    Uploads initial data to the CL device
    """
    def __init__(self, stream, nx, ny, halo_x, halo_y, h0, hu0, hv0):
+        self.logger =  logging.getLogger(__name__)
        self.h0  = CudaArray2D(stream, nx, ny, halo_x, halo_y, h0)
        self.hu0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hu0)
        self.hv0 = CudaArray2D(stream, nx, ny, halo_x, halo_y, hv0)
--- a/GPUSimulators/IPythonMagic.py
+++ b/GPUSimulators/IPythonMagic.py
@ -31,33 +31,43 @@ from GPUSimulators import Common
@magics_class
 class MyIPythonMagic(Magics): 
    @line_magic
-    def cuda_context_handler(self, context_name):
+    @magic_arguments.magic_arguments()
+    @magic_arguments.argument(
+        'name', type=str, help='Name of context to create')
+    @magic_arguments.argument(
+        '--blocking', '-b', action="store_true", help='Enable blocking context')
+    @magic_arguments.argument(
+        '--no_cache', '-nc', action="store_true", help='Disable caching of kernels')
+    @magic_arguments.argument(
+        '--no_autotuning', '-na', action="store_true", help='Disable autotuning of kernels')
+    def cuda_context_handler(self, line):
+        args = magic_arguments.parse_argstring(self.cuda_context_handler, line)
        self.logger =  logging.getLogger(__name__)
        
-        self.logger.debug("Registering %s as a global context", context_name)
+        self.logger.info("Registering %s in user workspace", args.name)
        
-        if context_name in self.shell.user_ns.keys():
+        if args.name in self.shell.user_ns.keys():
            self.logger.debug("Context already registered! Ignoring")
            return
        else:
            self.logger.debug("Creating context")
-            #self.shell.ex(context_name + " = Common.CudaContext(blocking=False)")
-            self.shell.user_ns[context_name] = Common.CudaContext(blocking=False)
+            use_cache = False if args.no_cache else True
+            use_autotuning = False if args.no_autotuning else True
+            self.shell.user_ns[args.name] = Common.CudaContext(blocking=args.blocking, use_cache=use_cache, autotuning=use_autotuning)
        
        # this function will be called on exceptions in any cell
        def custom_exc(shell, etype, evalue, tb, tb_offset=None):
-            self.logger.exception("Exception caught: Resetting to CUDA context %s", context_name)
+            self.logger.exception("Exception caught: Resetting to CUDA context %s", args.name)
            while (cuda.Context.get_current() != None):
                context = cuda.Context.get_current()
                self.logger.info("Popping <%s>", str(context.handle))
                cuda.Context.pop()

-            if context_name in self.shell.user_ns.keys():
-                self.logger.info("Pushing <%s>", str(self.shell.user_ns[context_name].cuda_context.handle))
-                #self.shell.ex(context_name + ".cuda_context.push()")
-                self.shell.user_ns[context_name].cuda_context.push()
+            if args.name in self.shell.user_ns.keys():
+                self.logger.info("Pushing <%s>", str(self.shell.user_ns[args.name].cuda_context.handle))
+                self.shell.user_ns[args.name].cuda_context.push()
            else:
-                self.logger.error("No CUDA context called %s found (something is wrong)", context_name)
+                self.logger.error("No CUDA context called %s found (something is wrong)", args.name)
                self.logger.error("CUDA will not work now")

            self.logger.debug("==================================================================")
--- a/GPUSimulators/Simulator.py
+++ b/GPUSimulators/Simulator.py
@ -69,6 +69,8 @@ class BaseSimulator:
        self.stream = cuda.Stream()
        
        #Create data by uploading to device
+        free, total = cuda.mem_get_info()
+        self.logger.debug("GPU memory: %d / %d MB available", int(free/(1024*1024)), int(total/(1024*1024)))
        self.data = Common.SWEDataArakawaA(self.stream, \
                            nx, ny, \
                            ghost_cells_x, ghost_cells_y, \