More profiling

2025-10-31 20:17:41 +01:00 · 2022-04-01 13:42:56 +00:00 · 2022-04-01 13:42:56 +00:00 · 124e033ff5
commit 124e033ff5
parent bf8dc33b28
4 changed files with 39 additions and 12 deletions
--- a/GPUSimulators/Common.py
+++ b/GPUSimulators/Common.py
@ -165,11 +165,13 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
                sim.simulate(t_step)
            #Download
            """
            save_vars = sim.download(download_vars)
            #Save to file
            for i, var_name in enumerate(save_var_names):
                ncvars[var_name][k, :] = save_vars[i]
            """
            #Write progress to screen
            print_string = progress_printer.getPrintString(t_end)
@ -178,7 +180,7 @@ def runSimulation(simulator, simulator_args, outfile, save_times, save_var_names
        logger.debug("Simulated to t={:f} in {:d} timesteps (average dt={:f})".format(t_end, sim.simSteps(), sim.simTime() / sim.simSteps()))
-    return outdata.filename   
+    return outdata.filename, sim.profiling_data
--- a/GPUSimulators/MPISimulator.py
+++ b/GPUSimulators/MPISimulator.py
@ -24,6 +24,7 @@ import logging
 from GPUSimulators import Simulator
 import numpy as np
 from mpi4py import MPI
 import time
@ -199,7 +200,14 @@ class MPISimulator(Simulator.BaseSimulator):
    """
    Class which handles communication between simulators on different MPI nodes
    """
-    def __init__(self, sim, grid):
+    def __init__(self, sim, grid):        
        self.profiling_data = { 'start': {}, 'end': {} }
        self.profiling_data["start"]["t_halo_exchange"] = 0
        self.profiling_data["end"]["t_halo_exchange"] = 0
        self.profiling_data["start"]["t_step"] = 0
        self.profiling_data["end"]["t_step"] = 0
        self.profiling_data["n_time_steps"] = 0
        self.profiling_data["start"]["t_mpi_sim_init"] = time.time()
        self.logger =  logging.getLogger(__name__)
        autotuner = sim.context.autotuner
@ -284,11 +292,17 @@ class MPISimulator(Simulator.BaseSimulator):
        self.out_s = np.empty_like(self.in_s)
        self.logger.debug("Simlator rank {:d} initialized on {:s}".format(self.grid.comm.rank, MPI.Get_processor_name()))
-    
+        self.profiling_data["end"]["t_mpi_sim_init"] = time.time()
    def substep(self, dt, step_number):
        self.profiling_data["start"]["t_halo_exchange"] += time.time()
        self.exchange()
        self.profiling_data["end"]["t_halo_exchange"] += time.time()
        self.profiling_data["start"]["t_step"] += time.time()
        self.sim.substep(dt, step_number)
        self.profiling_data["end"]["t_step"] += time.time()
        self.profiling_data["n_time_steps"] += 1
    def getOutput(self):
        return self.sim.getOutput()
--- a/README.md
+++ b/README.md
@ -6,10 +6,9 @@ Connection and run details for all potential benchmark systems.
 ### OsloMet 2 x Quadro RTX 6000 (VPN necessary)
 Connect:  
 ssh -AX ip-from-webpage  
-  
+
 For Jupyter Notebook:  
-ssh -L 8888:localhost:80 ip-from-webpage  
+Access https://seymour.cs.oslomet.no in browser and open terminal 
 (access localhost:8888 in browser and open terminal)  
 (one time operation) conda env create -f conda_environment.yml  
 conda activate ShallowWaterGPU / choose the "conda:ShallowWaterGPU" kernel in the notebook  
--- a/mpiTesting.py
+++ b/mpiTesting.py
@ -49,8 +49,10 @@ parser.add_argument('--profile', action='store_true') # default: False
 args = parser.parse_args()
 if(args.profile):
    profiling_data = {}
    # profiling: total run time
    t_total_start = time.time()
    t_init_start = time.time()
 # Get MPI COMM to use
@ -109,7 +111,7 @@ nx = args.nx
 ny = args.ny
 gamma = 1.4
-save_times = np.linspace(0, 0.02, 2)
+save_times = np.linspace(0, 0.1, 2)
 outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc"
 save_var_names = ['rho', 'rho_u', 'rho_v', 'E']
@ -118,6 +120,10 @@ arguments['context'] = cuda_context
 arguments['theta'] = 1.2
 arguments['grid'] = grid
 if(args.profile):
    t_init_end = time.time()
    t_init = t_init_end - t_init_start
    profiling_data["t_init"] = t_init
 ####
 # Run simulation
@ -132,12 +138,13 @@ def genSim(grid, **kwargs):
    return sim
-outfile = Common.runSimulation(
+outfile, sim_profiling_data = Common.runSimulation(
    genSim, arguments, outfile, save_times, save_var_names)
 if(args.profile):
    t_total_end = time.time()
    t_total = t_total_end - t_total_start
    profiling_data["t_total"] = t_total
    print("Total run time on rank " + str(MPI.COMM_WORLD.rank) + " is " + str(t_total) + " s")
 # write profiling to json file
@ -148,14 +155,19 @@ if(args.profile and MPI.COMM_WORLD.rank == 0):
        allocated_gpus = int(os.environ["CUDA_VISIBLE_DEVICES"].count(",") + 1)
        profiling_file = "MPI_jobid_" + \
            str(job_id) + "_" + str(allocated_nodes) + "_nodes_and_" + str(allocated_gpus) + "_GPUs_profiling.json"
        profiling_data["outfile"] = outfile
    else:
-        profiling_file = "MPI_test_profiling.json"
+        profiling_file = "MPI_" + str(MPI.COMM_WORLD.size) + "_procs_and_" + str(num_cuda_devices) + "_GPUs_profiling.json"
-    write_profiling_data = {}
+    for stage in sim_profiling_data["start"].keys():
-    write_profiling_data["total"] = t_total
+        profiling_data[stage] = sim_profiling_data["end"][stage] - sim_profiling_data["start"][stage]
    profiling_data["nx"] = nx
    profiling_data["ny"] = ny
    profiling_data["n_time_steps"] = sim_profiling_data["n_time_steps"]
    with open(profiling_file, "w") as write_file:
-        json.dump(write_profiling_data, write_file)
+        json.dump(profiling_data, write_file)
 ####
 # Clean shutdown