mirror of
				https://github.com/smyalygames/FiniteVolumeGPU.git
				synced 2025-10-31 20:27:40 +01:00 
			
		
		
		
	feat: add mpi testing for HIP
This commit is contained in:
		
							parent
							
								
									74398718c2
								
							
						
					
					
						commit
						dff97a1fdf
					
				
							
								
								
									
										197
									
								
								mpi_testing_hip.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										197
									
								
								mpi_testing_hip.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,197 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| """ | ||||
| This python module implements MPI simulations for benchmarking | ||||
| 
 | ||||
| Copyright (C) 2018  SINTEF ICT | ||||
| 
 | ||||
| This program is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| This program is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU General Public License | ||||
| along with this program.  If not, see <http://www.gnu.org/licenses/>. | ||||
| """ | ||||
| 
 | ||||
| import argparse | ||||
| import numpy as np | ||||
| import gc | ||||
| import time | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| 
 | ||||
| from mpi4py import MPI | ||||
| from hip import hip | ||||
| 
 | ||||
| from GPUSimulators.mpi import MPISimulator, MPIGrid | ||||
| from GPUSimulators.common import run_simulation, get_git_hash, get_git_status, hip_check | ||||
| from GPUSimulators.gpu import KernelContext | ||||
| from GPUSimulators.model import EE2DKP07Dimsplit | ||||
| from GPUSimulators.helpers import initial_conditions as IC | ||||
| 
 | ||||
| parser = argparse.ArgumentParser(description='Strong and weak scaling experiments.') | ||||
| parser.add_argument('-nx', type=int, default=128) | ||||
| parser.add_argument('-ny', type=int, default=128) | ||||
| parser.add_argument('--profile', action='store_true')  # default: False | ||||
| 
 | ||||
| args = parser.parse_args() | ||||
| 
 | ||||
| if args.profile: | ||||
|     profiling_data = {} | ||||
|     # profiling: total run time | ||||
|     t_total_start = time.time() | ||||
|     t_init_start = time.time() | ||||
| 
 | ||||
| # Get MPI COMM to use | ||||
| comm = MPI.COMM_WORLD | ||||
| 
 | ||||
| #### | ||||
| # Initialize logging | ||||
| #### | ||||
| log_level_console = 20 | ||||
| log_level_file = 10 | ||||
| log_filename = 'mpi_' + str(comm.rank) + '.log' | ||||
| logger = logging.getLogger('GPUSimulators') | ||||
| logger.setLevel(min(log_level_console, log_level_file)) | ||||
| 
 | ||||
| ch = logging.StreamHandler() | ||||
| ch.setLevel(log_level_console) | ||||
| logger.addHandler(ch) | ||||
| logger.info(f"Console logger using level {logging.getLevelName(log_level_console)}") | ||||
| 
 | ||||
| fh = logging.FileHandler(log_filename) | ||||
| formatter = logging.Formatter( | ||||
|     '%(asctime)s:%(name)s:%(levelname)s: %(message)s') | ||||
| fh.setFormatter(formatter) | ||||
| fh.setLevel(log_level_file) | ||||
| logger.addHandler(fh) | ||||
| logger.info(f"File logger using level {logging.getLevelName(log_level_file)} to {log_filename}") | ||||
| 
 | ||||
| #### | ||||
| # Initialize MPI grid etc | ||||
| #### | ||||
| logger.info("Creating MPI grid") | ||||
| grid = MPIGrid(MPI.COMM_WORLD) | ||||
| 
 | ||||
| #### | ||||
| # Initialize HIP | ||||
| #### | ||||
| logger.info("Initializing HIP") | ||||
| local_rank = grid.get_local_rank() | ||||
| num_hip_devices = hip_check(hip.hipGetDeviceCount()) | ||||
| hip_device = local_rank % num_hip_devices | ||||
| logger.info(f"Process {str(local_rank)} using HIP device {str(hip_device)}") | ||||
| context = KernelContext(device=hip_device, autotuning=False) | ||||
| 
 | ||||
| #### | ||||
| # Set initial conditions | ||||
| #### | ||||
| 
 | ||||
| # DEBUGGING - setting random seed | ||||
| np.random.seed(42) | ||||
| 
 | ||||
| logger.info("Generating initial conditions") | ||||
| nx = args.nx | ||||
| ny = args.ny | ||||
| 
 | ||||
| dt = 0.001 | ||||
| 
 | ||||
| gamma = 1.4 | ||||
| # save_times = np.linspace(0, 0.000009, 2) | ||||
| # save_times = np.linspace(0, 0.000099, 11) | ||||
| # save_times = np.linspace(0, 0.000099, 2) | ||||
| save_times = np.linspace(0, 20, 21) | ||||
| outfile = "mpi_out_" + str(MPI.COMM_WORLD.rank) + ".nc" | ||||
| save_var_names = ['rho', 'rho_u', 'rho_v', 'E'] | ||||
| 
 | ||||
| arguments = IC.gen_kelvin_helmholtz(nx, ny, gamma, grid=grid) | ||||
| arguments['context'] = context | ||||
| arguments['theta'] = 1.2 | ||||
| arguments['grid'] = grid | ||||
| 
 | ||||
| if args.profile: | ||||
|     t_init_end = time.time() | ||||
|     t_init = t_init_end - t_init_start | ||||
|     profiling_data["t_init"] = t_init | ||||
| 
 | ||||
| #### | ||||
| # Run simulation | ||||
| #### | ||||
| logger.info("Running simulation") | ||||
| 
 | ||||
| 
 | ||||
| # Helper function to create MPI simulator | ||||
| 
 | ||||
| 
 | ||||
| def gen_sim(grid, **kwargs): | ||||
|     local_sim = EE2DKP07Dimsplit(**kwargs) | ||||
|     sim = MPISimulator(local_sim, grid) | ||||
|     return sim | ||||
| 
 | ||||
| 
 | ||||
| outfile, sim_runner_profiling_data, sim_profiling_data = run_simulation( | ||||
|     gen_sim, arguments, outfile, save_times, save_var_names, dt) | ||||
| 
 | ||||
| if args.profile: | ||||
|     t_total_end = time.time() | ||||
|     t_total = t_total_end - t_total_start | ||||
|     profiling_data["t_total"] = t_total | ||||
|     print(f"Total run time on rank {str(MPI.COMM_WORLD.rank)} is {str(t_total)} s") | ||||
| 
 | ||||
| # write profiling to JSON file | ||||
| if args.profile and MPI.COMM_WORLD.rank == 0: | ||||
|     job_id = "" | ||||
|     if "SLURM_JOB_ID" in os.environ: | ||||
|         job_id = int(os.environ["SLURM_JOB_ID"]) | ||||
|         allocated_nodes = int(os.environ["SLURM_JOB_NUM_NODES"]) | ||||
|         allocated_gpus = int(os.environ["ROCR_VISIBLE_DEVICES"].count(",") + 1) | ||||
|         profiling_file = "MPI_jobid_" + \ | ||||
|                          str(job_id) + "_" + str(allocated_nodes) + "_nodes_and_" + str( | ||||
|             allocated_gpus) + "_GPUs_profiling.json" | ||||
|         profiling_data["outfile"] = outfile | ||||
|     else: | ||||
|         profiling_file = "MPI_" + str(MPI.COMM_WORLD.size) + "_procs_and_" + str( | ||||
|             num_hip_devices) + "_GPUs_profiling.json" | ||||
| 
 | ||||
|     for stage in sim_runner_profiling_data["start"].keys(): | ||||
|         profiling_data[stage] = sim_runner_profiling_data["end"][stage] - sim_runner_profiling_data["start"][stage] | ||||
| 
 | ||||
|     for stage in sim_profiling_data["start"].keys(): | ||||
|         profiling_data[stage] = sim_profiling_data["end"][stage] - sim_profiling_data["start"][stage] | ||||
| 
 | ||||
|     profiling_data["nx"] = nx | ||||
|     profiling_data["ny"] = ny | ||||
|     profiling_data["dt"] = dt | ||||
|     profiling_data["n_time_steps"] = sim_profiling_data["n_time_steps"] | ||||
| 
 | ||||
|     profiling_data["slurm_job_id"] = job_id | ||||
|     profiling_data["n_hip_devices"] = str(num_hip_devices) | ||||
|     profiling_data["n_processes"] = str(MPI.COMM_WORLD.size) | ||||
|     profiling_data["git_hash"] = get_git_hash() | ||||
|     profiling_data["git_status"] = get_git_status() | ||||
| 
 | ||||
|     with open(profiling_file, "w") as write_file: | ||||
|         json.dump(profiling_data, write_file) | ||||
| 
 | ||||
| #### | ||||
| # Clean shutdown | ||||
| #### | ||||
| sim = None | ||||
| local_sim = None | ||||
| context = None | ||||
| arguments = None | ||||
| logging.shutdown() | ||||
| gc.collect() | ||||
| 
 | ||||
| #### | ||||
| # Print completion and exit | ||||
| #### | ||||
| print("Completed!") | ||||
| exit(0) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user
	 Anthony Berg
						Anthony Berg